{ "best_global_step": 264710, "best_metric": 0.870147919438821, "best_model_checkpoint": "T:\\laupodteam\\AIOS\\Bram\\language_modeling\\Models\\language_models\\CardioCCC\\EuroBERT\\multilabel_3ldense_20epochs_40splits/fold_0\\checkpoint-264710", "epoch": 10.0, "eval_steps": 500, "global_step": 264710, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.009670960673945073, "grad_norm": 1.4626084566116333, "learning_rate": 1.9980733633032377e-05, "loss": 1.326, "step": 256 }, { "epoch": 0.019341921347890145, "grad_norm": 2.9422495365142822, "learning_rate": 1.9961391711684486e-05, "loss": 0.9436, "step": 512 }, { "epoch": 0.029012882021835214, "grad_norm": 3.939150333404541, "learning_rate": 1.9942049790336598e-05, "loss": 0.8108, "step": 768 }, { "epoch": 0.03868384269578029, "grad_norm": 3.260474681854248, "learning_rate": 1.9922707868988706e-05, "loss": 0.746, "step": 1024 }, { "epoch": 0.04835480336972536, "grad_norm": 2.8487510681152344, "learning_rate": 1.9903365947640814e-05, "loss": 0.6876, "step": 1280 }, { "epoch": 0.05802576404367043, "grad_norm": 3.7067031860351562, "learning_rate": 1.9884024026292926e-05, "loss": 0.6491, "step": 1536 }, { "epoch": 0.06769672471761551, "grad_norm": 3.1346232891082764, "learning_rate": 1.9864682104945038e-05, "loss": 0.6117, "step": 1792 }, { "epoch": 0.07736768539156058, "grad_norm": 3.9794366359710693, "learning_rate": 1.9845340183597147e-05, "loss": 0.584, "step": 2048 }, { "epoch": 0.08703864606550565, "grad_norm": 3.7597267627716064, "learning_rate": 1.9825998262249255e-05, "loss": 0.5541, "step": 2304 }, { "epoch": 0.09670960673945073, "grad_norm": 3.669264078140259, "learning_rate": 1.9806656340901363e-05, "loss": 0.5273, "step": 2560 }, { "epoch": 0.10638056741339579, "grad_norm": 3.203001022338867, "learning_rate": 1.9787314419553475e-05, "loss": 0.5148, "step": 2816 }, { "epoch": 0.11605152808734086, "grad_norm": 4.039089202880859, "learning_rate": 1.9767972498205587e-05, "loss": 0.4907, "step": 3072 }, { "epoch": 0.12572248876128594, "grad_norm": 3.2784998416900635, "learning_rate": 1.9748630576857695e-05, "loss": 0.4825, "step": 3328 }, { "epoch": 0.13539344943523102, "grad_norm": 3.981826066970825, "learning_rate": 1.9729288655509804e-05, "loss": 0.4639, "step": 3584 }, { "epoch": 0.1450644101091761, "grad_norm": 4.3725690841674805, "learning_rate": 1.9709946734161916e-05, "loss": 0.4522, "step": 3840 }, { "epoch": 0.15473537078312116, "grad_norm": 4.14705753326416, "learning_rate": 1.969068036719429e-05, "loss": 0.4452, "step": 4096 }, { "epoch": 0.16440633145706623, "grad_norm": 3.336223840713501, "learning_rate": 1.96713384458464e-05, "loss": 0.4292, "step": 4352 }, { "epoch": 0.1740772921310113, "grad_norm": 4.36456823348999, "learning_rate": 1.9651996524498508e-05, "loss": 0.4108, "step": 4608 }, { "epoch": 0.18374825280495638, "grad_norm": 3.929685354232788, "learning_rate": 1.9632654603150617e-05, "loss": 0.403, "step": 4864 }, { "epoch": 0.19341921347890145, "grad_norm": 4.6235671043396, "learning_rate": 1.961331268180273e-05, "loss": 0.3899, "step": 5120 }, { "epoch": 0.2030901741528465, "grad_norm": 4.0524492263793945, "learning_rate": 1.959397076045484e-05, "loss": 0.3913, "step": 5376 }, { "epoch": 0.21276113482679157, "grad_norm": 3.8945560455322266, "learning_rate": 1.957462883910695e-05, "loss": 0.3781, "step": 5632 }, { "epoch": 0.22243209550073664, "grad_norm": 4.181601047515869, "learning_rate": 1.9555286917759057e-05, "loss": 0.3758, "step": 5888 }, { "epoch": 0.23210305617468172, "grad_norm": 5.5385308265686035, "learning_rate": 1.9536020550791436e-05, "loss": 0.3646, "step": 6144 }, { "epoch": 0.2417740168486268, "grad_norm": 5.160412311553955, "learning_rate": 1.9516678629443545e-05, "loss": 0.3572, "step": 6400 }, { "epoch": 0.2514449775225719, "grad_norm": 6.071929931640625, "learning_rate": 1.9497336708095653e-05, "loss": 0.3453, "step": 6656 }, { "epoch": 0.26111593819651696, "grad_norm": 4.409694671630859, "learning_rate": 1.947799478674776e-05, "loss": 0.3504, "step": 6912 }, { "epoch": 0.27078689887046203, "grad_norm": 4.06134033203125, "learning_rate": 1.9458652865399873e-05, "loss": 0.3404, "step": 7168 }, { "epoch": 0.2804578595444071, "grad_norm": 3.8032515048980713, "learning_rate": 1.9439310944051982e-05, "loss": 0.34, "step": 7424 }, { "epoch": 0.2901288202183522, "grad_norm": 4.95781135559082, "learning_rate": 1.9419969022704094e-05, "loss": 0.3249, "step": 7680 }, { "epoch": 0.29979978089229725, "grad_norm": 3.898789167404175, "learning_rate": 1.9400627101356202e-05, "loss": 0.3218, "step": 7936 }, { "epoch": 0.3094707415662423, "grad_norm": 4.6803717613220215, "learning_rate": 1.9381285180008314e-05, "loss": 0.3144, "step": 8192 }, { "epoch": 0.3191417022401874, "grad_norm": 6.2057600021362305, "learning_rate": 1.936201881304069e-05, "loss": 0.3056, "step": 8448 }, { "epoch": 0.32881266291413247, "grad_norm": 4.742001056671143, "learning_rate": 1.9342676891692798e-05, "loss": 0.3031, "step": 8704 }, { "epoch": 0.33848362358807754, "grad_norm": 5.605335235595703, "learning_rate": 1.9323334970344906e-05, "loss": 0.2989, "step": 8960 }, { "epoch": 0.3481545842620226, "grad_norm": 5.512292861938477, "learning_rate": 1.9303993048997015e-05, "loss": 0.2838, "step": 9216 }, { "epoch": 0.3578255449359677, "grad_norm": 5.346792697906494, "learning_rate": 1.9284651127649127e-05, "loss": 0.2871, "step": 9472 }, { "epoch": 0.36749650560991276, "grad_norm": 3.5019690990448, "learning_rate": 1.926530920630124e-05, "loss": 0.2794, "step": 9728 }, { "epoch": 0.37716746628385783, "grad_norm": 4.682333946228027, "learning_rate": 1.9245967284953347e-05, "loss": 0.2736, "step": 9984 }, { "epoch": 0.3868384269578029, "grad_norm": 7.038910865783691, "learning_rate": 1.9226625363605455e-05, "loss": 0.2786, "step": 10240 }, { "epoch": 0.396509387631748, "grad_norm": 4.099844455718994, "learning_rate": 1.9207283442257567e-05, "loss": 0.2721, "step": 10496 }, { "epoch": 0.406180348305693, "grad_norm": 3.918942928314209, "learning_rate": 1.9187941520909676e-05, "loss": 0.2704, "step": 10752 }, { "epoch": 0.41585130897963807, "grad_norm": 5.411423683166504, "learning_rate": 1.9168599599561784e-05, "loss": 0.2604, "step": 11008 }, { "epoch": 0.42552226965358314, "grad_norm": 7.702000617980957, "learning_rate": 1.9149257678213896e-05, "loss": 0.2618, "step": 11264 }, { "epoch": 0.4351932303275282, "grad_norm": 4.103521347045898, "learning_rate": 1.912999131124627e-05, "loss": 0.2511, "step": 11520 }, { "epoch": 0.4448641910014733, "grad_norm": 6.066595077514648, "learning_rate": 1.911064938989838e-05, "loss": 0.2551, "step": 11776 }, { "epoch": 0.45453515167541836, "grad_norm": 6.25346565246582, "learning_rate": 1.9091307468550492e-05, "loss": 0.2499, "step": 12032 }, { "epoch": 0.46420611234936343, "grad_norm": 4.310111045837402, "learning_rate": 1.90719655472026e-05, "loss": 0.2403, "step": 12288 }, { "epoch": 0.4738770730233085, "grad_norm": 6.692182540893555, "learning_rate": 1.9052623625854712e-05, "loss": 0.24, "step": 12544 }, { "epoch": 0.4835480336972536, "grad_norm": 4.0325751304626465, "learning_rate": 1.903328170450682e-05, "loss": 0.2383, "step": 12800 }, { "epoch": 0.49321899437119865, "grad_norm": 4.97512149810791, "learning_rate": 1.901393978315893e-05, "loss": 0.2333, "step": 13056 }, { "epoch": 0.5028899550451438, "grad_norm": 4.146473407745361, "learning_rate": 1.899459786181104e-05, "loss": 0.2342, "step": 13312 }, { "epoch": 0.5125609157190888, "grad_norm": 6.477240562438965, "learning_rate": 1.897525594046315e-05, "loss": 0.23, "step": 13568 }, { "epoch": 0.5222318763930339, "grad_norm": 5.060920715332031, "learning_rate": 1.895591401911526e-05, "loss": 0.2317, "step": 13824 }, { "epoch": 0.5319028370669789, "grad_norm": 7.546407222747803, "learning_rate": 1.893657209776737e-05, "loss": 0.2235, "step": 14080 }, { "epoch": 0.5415737977409241, "grad_norm": 6.76540994644165, "learning_rate": 1.8917230176419478e-05, "loss": 0.2189, "step": 14336 }, { "epoch": 0.5512447584148691, "grad_norm": 6.734369277954102, "learning_rate": 1.889788825507159e-05, "loss": 0.2136, "step": 14592 }, { "epoch": 0.5609157190888142, "grad_norm": 4.962408065795898, "learning_rate": 1.8878546333723698e-05, "loss": 0.2116, "step": 14848 }, { "epoch": 0.5705866797627592, "grad_norm": 6.1386332511901855, "learning_rate": 1.8859279966756074e-05, "loss": 0.212, "step": 15104 }, { "epoch": 0.5802576404367044, "grad_norm": 6.326384544372559, "learning_rate": 1.8839938045408182e-05, "loss": 0.2074, "step": 15360 }, { "epoch": 0.5899286011106494, "grad_norm": 6.425912857055664, "learning_rate": 1.8820596124060294e-05, "loss": 0.2043, "step": 15616 }, { "epoch": 0.5995995617845945, "grad_norm": 5.6023945808410645, "learning_rate": 1.8801254202712406e-05, "loss": 0.205, "step": 15872 }, { "epoch": 0.6092705224585395, "grad_norm": 5.436675071716309, "learning_rate": 1.8781912281364514e-05, "loss": 0.2, "step": 16128 }, { "epoch": 0.6189414831324846, "grad_norm": 3.5356578826904297, "learning_rate": 1.8762570360016623e-05, "loss": 0.1957, "step": 16384 }, { "epoch": 0.6286124438064297, "grad_norm": 4.914231777191162, "learning_rate": 1.874322843866873e-05, "loss": 0.1957, "step": 16640 }, { "epoch": 0.6382834044803748, "grad_norm": 5.829110145568848, "learning_rate": 1.8723886517320843e-05, "loss": 0.1936, "step": 16896 }, { "epoch": 0.6479543651543198, "grad_norm": 9.775616645812988, "learning_rate": 1.8704544595972955e-05, "loss": 0.194, "step": 17152 }, { "epoch": 0.6576253258282649, "grad_norm": 3.6314735412597656, "learning_rate": 1.8685202674625063e-05, "loss": 0.1862, "step": 17408 }, { "epoch": 0.66729628650221, "grad_norm": 4.77644681930542, "learning_rate": 1.8665860753277172e-05, "loss": 0.1828, "step": 17664 }, { "epoch": 0.6769672471761551, "grad_norm": 7.319884300231934, "learning_rate": 1.8646518831929284e-05, "loss": 0.1879, "step": 17920 }, { "epoch": 0.6866382078501001, "grad_norm": 5.536057949066162, "learning_rate": 1.8627176910581392e-05, "loss": 0.1836, "step": 18176 }, { "epoch": 0.6963091685240452, "grad_norm": 5.481319904327393, "learning_rate": 1.86078349892335e-05, "loss": 0.1802, "step": 18432 }, { "epoch": 0.7059801291979902, "grad_norm": 6.011005401611328, "learning_rate": 1.8588493067885612e-05, "loss": 0.1824, "step": 18688 }, { "epoch": 0.7156510898719354, "grad_norm": 5.187521457672119, "learning_rate": 1.8569151146537724e-05, "loss": 0.1763, "step": 18944 }, { "epoch": 0.7253220505458804, "grad_norm": 6.904630661010742, "learning_rate": 1.8549884779570096e-05, "loss": 0.1719, "step": 19200 }, { "epoch": 0.7349930112198255, "grad_norm": 7.806436538696289, "learning_rate": 1.8530542858222208e-05, "loss": 0.1707, "step": 19456 }, { "epoch": 0.7446639718937705, "grad_norm": 3.597665548324585, "learning_rate": 1.8511200936874317e-05, "loss": 0.1706, "step": 19712 }, { "epoch": 0.7543349325677157, "grad_norm": 4.756324291229248, "learning_rate": 1.849185901552643e-05, "loss": 0.1689, "step": 19968 }, { "epoch": 0.7640058932416607, "grad_norm": 5.917934894561768, "learning_rate": 1.8472517094178537e-05, "loss": 0.1671, "step": 20224 }, { "epoch": 0.7736768539156058, "grad_norm": 7.077094554901123, "learning_rate": 1.8453175172830645e-05, "loss": 0.1655, "step": 20480 }, { "epoch": 0.7833478145895508, "grad_norm": 4.210581302642822, "learning_rate": 1.8433833251482757e-05, "loss": 0.1576, "step": 20736 }, { "epoch": 0.793018775263496, "grad_norm": 4.523745059967041, "learning_rate": 1.8414491330134866e-05, "loss": 0.1601, "step": 20992 }, { "epoch": 0.802689735937441, "grad_norm": 4.547347545623779, "learning_rate": 1.8395149408786977e-05, "loss": 0.1583, "step": 21248 }, { "epoch": 0.812360696611386, "grad_norm": 4.029369831085205, "learning_rate": 1.837588304181935e-05, "loss": 0.1576, "step": 21504 }, { "epoch": 0.8220316572853311, "grad_norm": 5.123602867126465, "learning_rate": 1.835654112047146e-05, "loss": 0.1514, "step": 21760 }, { "epoch": 0.8317026179592761, "grad_norm": 6.265158176422119, "learning_rate": 1.833719919912357e-05, "loss": 0.157, "step": 22016 }, { "epoch": 0.8413735786332213, "grad_norm": 8.210796356201172, "learning_rate": 1.8317857277775682e-05, "loss": 0.1503, "step": 22272 }, { "epoch": 0.8510445393071663, "grad_norm": 10.078185081481934, "learning_rate": 1.829851535642779e-05, "loss": 0.15, "step": 22528 }, { "epoch": 0.8607154999811114, "grad_norm": 6.486992359161377, "learning_rate": 1.82791734350799e-05, "loss": 0.1484, "step": 22784 }, { "epoch": 0.8703864606550564, "grad_norm": 6.386577129364014, "learning_rate": 1.825983151373201e-05, "loss": 0.1476, "step": 23040 }, { "epoch": 0.8800574213290016, "grad_norm": 7.5579328536987305, "learning_rate": 1.8240489592384122e-05, "loss": 0.1458, "step": 23296 }, { "epoch": 0.8897283820029466, "grad_norm": 3.283404588699341, "learning_rate": 1.8221223225416495e-05, "loss": 0.144, "step": 23552 }, { "epoch": 0.8993993426768917, "grad_norm": 7.462500095367432, "learning_rate": 1.8201881304068606e-05, "loss": 0.1475, "step": 23808 }, { "epoch": 0.9090703033508367, "grad_norm": 4.219975471496582, "learning_rate": 1.8182539382720715e-05, "loss": 0.1415, "step": 24064 }, { "epoch": 0.9187412640247818, "grad_norm": 5.009161949157715, "learning_rate": 1.8163197461372823e-05, "loss": 0.1395, "step": 24320 }, { "epoch": 0.9284122246987269, "grad_norm": 5.921178340911865, "learning_rate": 1.8143855540024935e-05, "loss": 0.1381, "step": 24576 }, { "epoch": 0.938083185372672, "grad_norm": 5.374543190002441, "learning_rate": 1.8124513618677044e-05, "loss": 0.1352, "step": 24832 }, { "epoch": 0.947754146046617, "grad_norm": 3.8733317852020264, "learning_rate": 1.8105171697329152e-05, "loss": 0.1346, "step": 25088 }, { "epoch": 0.9574251067205621, "grad_norm": 4.049707412719727, "learning_rate": 1.8085829775981264e-05, "loss": 0.1319, "step": 25344 }, { "epoch": 0.9670960673945072, "grad_norm": 7.517127990722656, "learning_rate": 1.806656340901364e-05, "loss": 0.1271, "step": 25600 }, { "epoch": 0.9767670280684523, "grad_norm": 4.687051773071289, "learning_rate": 1.8047221487665748e-05, "loss": 0.1279, "step": 25856 }, { "epoch": 0.9864379887423973, "grad_norm": 4.79626989364624, "learning_rate": 1.802787956631786e-05, "loss": 0.1307, "step": 26112 }, { "epoch": 0.9961089494163424, "grad_norm": 4.730831623077393, "learning_rate": 1.8008537644969968e-05, "loss": 0.1302, "step": 26368 }, { "epoch": 1.0, "eval_f1_B-DISEASE": 0.7781765393765878, "eval_f1_B-MEDICATION": 0.9037345902828137, "eval_f1_B-PROCEDURE": 0.7974038223998922, "eval_f1_B-SYMPTOM": 0.7777604788347432, "eval_f1_I-DISEASE": 0.8292819119234635, "eval_f1_I-MEDICATION": 0.9226245076350295, "eval_f1_I-PROCEDURE": 0.8287028141772023, "eval_f1_I-SYMPTOM": 0.7961978076970171, "eval_f1_O": 0.9184682391128007, "eval_f1_macro": 0.8391500790488389, "eval_f1_micro": 0.8767886171279684, "eval_loss": 0.26655662059783936, "eval_precision_B-DISEASE": 0.7914206036745407, "eval_precision_B-MEDICATION": 0.9209310918159985, "eval_precision_B-PROCEDURE": 0.839345657415725, "eval_precision_B-SYMPTOM": 0.7863714275719733, "eval_precision_I-DISEASE": 0.8204249990792915, "eval_precision_I-MEDICATION": 0.9416267415606586, "eval_precision_I-PROCEDURE": 0.8691026329930145, "eval_precision_I-SYMPTOM": 0.7889775161164393, "eval_precision_O": 0.9252825164715074, "eval_precision_macro": 0.8537203540776832, "eval_precision_micro": 0.8833806029697474, "eval_rauc_macro": 0.9021063420010184, "eval_rauc_micro": 0.9278253367848399, "eval_recall_B-DISEASE": 0.7653684461013722, "eval_recall_B-MEDICATION": 0.8871685353265706, "eval_recall_B-PROCEDURE": 0.7594541409993155, "eval_recall_B-SYMPTOM": 0.7693360711841205, "eval_recall_I-DISEASE": 0.838332141647537, "eval_recall_I-MEDICATION": 0.9043740413603427, "eval_recall_I-PROCEDURE": 0.7918920903816495, "eval_recall_I-SYMPTOM": 0.8035514721684934, "eval_recall_O": 0.9117535959899989, "eval_recall_macro": 0.8256922816843777, "eval_recall_micro": 0.8702942843552489, "eval_roc_auc_B-DISEASE": 0.8808075963149171, "eval_roc_auc_B-MEDICATION": 0.9434285957498447, "eval_roc_auc_B-PROCEDURE": 0.87847497845947, "eval_roc_auc_B-SYMPTOM": 0.8824080001390723, "eval_roc_auc_I-DISEASE": 0.9071844217170272, "eval_roc_auc_I-MEDICATION": 0.9517977149811767, "eval_roc_auc_I-PROCEDURE": 0.8901357064770181, "eval_roc_auc_I-SYMPTOM": 0.8850274935590098, "eval_roc_auc_O": 0.8996925706116287, "eval_runtime": 61.1466, "eval_samples_per_second": 177.818, "eval_steps_per_second": 22.242, "step": 26471 }, { "epoch": 1.0057799100902876, "grad_norm": 3.7280209064483643, "learning_rate": 1.798919572362208e-05, "loss": 0.1119, "step": 26624 }, { "epoch": 1.0154508707642325, "grad_norm": 5.684998512268066, "learning_rate": 1.796985380227419e-05, "loss": 0.0936, "step": 26880 }, { "epoch": 1.0251218314381776, "grad_norm": 3.695190191268921, "learning_rate": 1.7950511880926297e-05, "loss": 0.0955, "step": 27136 }, { "epoch": 1.0347927921121227, "grad_norm": 4.493898868560791, "learning_rate": 1.793116995957841e-05, "loss": 0.0922, "step": 27392 }, { "epoch": 1.0444637527860678, "grad_norm": 4.638331413269043, "learning_rate": 1.7911828038230517e-05, "loss": 0.0923, "step": 27648 }, { "epoch": 1.0541347134600128, "grad_norm": 4.798746585845947, "learning_rate": 1.7892561671262893e-05, "loss": 0.0918, "step": 27904 }, { "epoch": 1.0638056741339579, "grad_norm": 4.274206638336182, "learning_rate": 1.7873219749915e-05, "loss": 0.0945, "step": 28160 }, { "epoch": 1.073476634807903, "grad_norm": 3.235424280166626, "learning_rate": 1.7853877828567113e-05, "loss": 0.0919, "step": 28416 }, { "epoch": 1.0831475954818481, "grad_norm": 4.507290363311768, "learning_rate": 1.783453590721922e-05, "loss": 0.0932, "step": 28672 }, { "epoch": 1.092818556155793, "grad_norm": 10.315567016601562, "learning_rate": 1.7815193985871333e-05, "loss": 0.0952, "step": 28928 }, { "epoch": 1.1024895168297382, "grad_norm": 8.936885833740234, "learning_rate": 1.7795852064523442e-05, "loss": 0.0899, "step": 29184 }, { "epoch": 1.1121604775036833, "grad_norm": 2.79886531829834, "learning_rate": 1.777651014317555e-05, "loss": 0.0901, "step": 29440 }, { "epoch": 1.1218314381776284, "grad_norm": 3.2710986137390137, "learning_rate": 1.7757168221827662e-05, "loss": 0.0883, "step": 29696 }, { "epoch": 1.1315023988515733, "grad_norm": 4.917572498321533, "learning_rate": 1.7737826300479774e-05, "loss": 0.0896, "step": 29952 }, { "epoch": 1.1411733595255185, "grad_norm": 4.567966938018799, "learning_rate": 1.7718484379131882e-05, "loss": 0.0914, "step": 30208 }, { "epoch": 1.1508443201994636, "grad_norm": 3.6949679851531982, "learning_rate": 1.769914245778399e-05, "loss": 0.0871, "step": 30464 }, { "epoch": 1.1605152808734087, "grad_norm": 8.372673988342285, "learning_rate": 1.7679876090816366e-05, "loss": 0.0909, "step": 30720 }, { "epoch": 1.1701862415473536, "grad_norm": 3.8434600830078125, "learning_rate": 1.7660534169468478e-05, "loss": 0.0849, "step": 30976 }, { "epoch": 1.1798572022212988, "grad_norm": 4.115856647491455, "learning_rate": 1.7641192248120587e-05, "loss": 0.0904, "step": 31232 }, { "epoch": 1.1895281628952439, "grad_norm": 3.246572971343994, "learning_rate": 1.7621850326772695e-05, "loss": 0.0907, "step": 31488 }, { "epoch": 1.199199123569189, "grad_norm": 3.668151617050171, "learning_rate": 1.7602508405424807e-05, "loss": 0.0864, "step": 31744 }, { "epoch": 1.208870084243134, "grad_norm": 3.0340752601623535, "learning_rate": 1.7583166484076915e-05, "loss": 0.0848, "step": 32000 }, { "epoch": 1.218541044917079, "grad_norm": 4.029708385467529, "learning_rate": 1.7563824562729027e-05, "loss": 0.0857, "step": 32256 }, { "epoch": 1.2282120055910242, "grad_norm": 5.181060791015625, "learning_rate": 1.7544482641381136e-05, "loss": 0.0863, "step": 32512 }, { "epoch": 1.2378829662649693, "grad_norm": 5.2258124351501465, "learning_rate": 1.752521627441351e-05, "loss": 0.0867, "step": 32768 }, { "epoch": 1.2475539269389142, "grad_norm": 10.210968017578125, "learning_rate": 1.750587435306562e-05, "loss": 0.0878, "step": 33024 }, { "epoch": 1.2572248876128593, "grad_norm": 3.4898252487182617, "learning_rate": 1.748653243171773e-05, "loss": 0.0831, "step": 33280 }, { "epoch": 1.2668958482868045, "grad_norm": 4.27427864074707, "learning_rate": 1.746719051036984e-05, "loss": 0.0864, "step": 33536 }, { "epoch": 1.2765668089607494, "grad_norm": 5.286701679229736, "learning_rate": 1.744784858902195e-05, "loss": 0.0799, "step": 33792 }, { "epoch": 1.2862377696346945, "grad_norm": 3.2482128143310547, "learning_rate": 1.742850666767406e-05, "loss": 0.084, "step": 34048 }, { "epoch": 1.2959087303086396, "grad_norm": 2.9942526817321777, "learning_rate": 1.7409164746326172e-05, "loss": 0.0834, "step": 34304 }, { "epoch": 1.3055796909825848, "grad_norm": 9.361547470092773, "learning_rate": 1.738982282497828e-05, "loss": 0.0851, "step": 34560 }, { "epoch": 1.3152506516565299, "grad_norm": 5.259056568145752, "learning_rate": 1.737048090363039e-05, "loss": 0.0824, "step": 34816 }, { "epoch": 1.3249216123304748, "grad_norm": 4.652898788452148, "learning_rate": 1.7351138982282497e-05, "loss": 0.0807, "step": 35072 }, { "epoch": 1.33459257300442, "grad_norm": 3.225607395172119, "learning_rate": 1.733179706093461e-05, "loss": 0.0833, "step": 35328 }, { "epoch": 1.344263533678365, "grad_norm": 4.242973804473877, "learning_rate": 1.7312530693966985e-05, "loss": 0.0785, "step": 35584 }, { "epoch": 1.35393449435231, "grad_norm": 3.6310012340545654, "learning_rate": 1.7293188772619093e-05, "loss": 0.082, "step": 35840 }, { "epoch": 1.363605455026255, "grad_norm": 4.149777412414551, "learning_rate": 1.72738468512712e-05, "loss": 0.0788, "step": 36096 }, { "epoch": 1.3732764157002002, "grad_norm": 3.4874989986419678, "learning_rate": 1.7254504929923313e-05, "loss": 0.0813, "step": 36352 }, { "epoch": 1.3829473763741453, "grad_norm": 4.847866535186768, "learning_rate": 1.7235163008575425e-05, "loss": 0.0762, "step": 36608 }, { "epoch": 1.3926183370480905, "grad_norm": 6.0248332023620605, "learning_rate": 1.7215821087227534e-05, "loss": 0.084, "step": 36864 }, { "epoch": 1.4022892977220354, "grad_norm": 12.932506561279297, "learning_rate": 1.7196479165879642e-05, "loss": 0.0774, "step": 37120 }, { "epoch": 1.4119602583959805, "grad_norm": 4.421403884887695, "learning_rate": 1.7177137244531754e-05, "loss": 0.0745, "step": 37376 }, { "epoch": 1.4216312190699256, "grad_norm": 4.352053165435791, "learning_rate": 1.7157795323183862e-05, "loss": 0.0766, "step": 37632 }, { "epoch": 1.4313021797438705, "grad_norm": 3.0803287029266357, "learning_rate": 1.7138528956216238e-05, "loss": 0.0786, "step": 37888 }, { "epoch": 1.4409731404178157, "grad_norm": 9.046032905578613, "learning_rate": 1.7119187034868347e-05, "loss": 0.0792, "step": 38144 }, { "epoch": 1.4506441010917608, "grad_norm": 5.227222442626953, "learning_rate": 1.709984511352046e-05, "loss": 0.0782, "step": 38400 }, { "epoch": 1.460315061765706, "grad_norm": 9.277040481567383, "learning_rate": 1.7080503192172567e-05, "loss": 0.0734, "step": 38656 }, { "epoch": 1.469986022439651, "grad_norm": 6.931709289550781, "learning_rate": 1.706116127082468e-05, "loss": 0.0745, "step": 38912 }, { "epoch": 1.479656983113596, "grad_norm": 2.403529167175293, "learning_rate": 1.7041819349476787e-05, "loss": 0.0713, "step": 39168 }, { "epoch": 1.489327943787541, "grad_norm": 3.0608630180358887, "learning_rate": 1.7022477428128895e-05, "loss": 0.0724, "step": 39424 }, { "epoch": 1.4989989044614862, "grad_norm": 2.9378268718719482, "learning_rate": 1.7003135506781007e-05, "loss": 0.0719, "step": 39680 }, { "epoch": 1.508669865135431, "grad_norm": 4.745122909545898, "learning_rate": 1.6983869139813383e-05, "loss": 0.0755, "step": 39936 }, { "epoch": 1.5183408258093762, "grad_norm": 7.573899745941162, "learning_rate": 1.696452721846549e-05, "loss": 0.0731, "step": 40192 }, { "epoch": 1.5280117864833214, "grad_norm": 10.226018905639648, "learning_rate": 1.69451852971176e-05, "loss": 0.076, "step": 40448 }, { "epoch": 1.5376827471572665, "grad_norm": 5.051877021789551, "learning_rate": 1.692584337576971e-05, "loss": 0.075, "step": 40704 }, { "epoch": 1.5473537078312116, "grad_norm": 4.395775318145752, "learning_rate": 1.6906501454421823e-05, "loss": 0.0735, "step": 40960 }, { "epoch": 1.5570246685051568, "grad_norm": 4.498766899108887, "learning_rate": 1.6887159533073932e-05, "loss": 0.0729, "step": 41216 }, { "epoch": 1.5666956291791017, "grad_norm": 5.933803558349609, "learning_rate": 1.686781761172604e-05, "loss": 0.0748, "step": 41472 }, { "epoch": 1.5763665898530468, "grad_norm": 2.9409236907958984, "learning_rate": 1.6848475690378152e-05, "loss": 0.07, "step": 41728 }, { "epoch": 1.5860375505269917, "grad_norm": 8.31312370300293, "learning_rate": 1.6829209323410528e-05, "loss": 0.0703, "step": 41984 }, { "epoch": 1.5957085112009368, "grad_norm": 1.95456862449646, "learning_rate": 1.6809867402062636e-05, "loss": 0.0709, "step": 42240 }, { "epoch": 1.605379471874882, "grad_norm": 3.6376004219055176, "learning_rate": 1.6790525480714745e-05, "loss": 0.0656, "step": 42496 }, { "epoch": 1.615050432548827, "grad_norm": 3.3740224838256836, "learning_rate": 1.6771183559366853e-05, "loss": 0.0728, "step": 42752 }, { "epoch": 1.6247213932227722, "grad_norm": 2.608504295349121, "learning_rate": 1.6751841638018965e-05, "loss": 0.0723, "step": 43008 }, { "epoch": 1.634392353896717, "grad_norm": 5.380160808563232, "learning_rate": 1.6732499716671077e-05, "loss": 0.0686, "step": 43264 }, { "epoch": 1.6440633145706622, "grad_norm": 1.6728038787841797, "learning_rate": 1.6713157795323185e-05, "loss": 0.0668, "step": 43520 }, { "epoch": 1.6537342752446071, "grad_norm": 7.20682430267334, "learning_rate": 1.6693815873975294e-05, "loss": 0.0689, "step": 43776 }, { "epoch": 1.6634052359185523, "grad_norm": 9.442398071289062, "learning_rate": 1.667454950700767e-05, "loss": 0.0669, "step": 44032 }, { "epoch": 1.6730761965924974, "grad_norm": 3.7477312088012695, "learning_rate": 1.665520758565978e-05, "loss": 0.0647, "step": 44288 }, { "epoch": 1.6827471572664425, "grad_norm": 4.700344085693359, "learning_rate": 1.663586566431189e-05, "loss": 0.0712, "step": 44544 }, { "epoch": 1.6924181179403877, "grad_norm": 3.5288517475128174, "learning_rate": 1.6616523742963998e-05, "loss": 0.0678, "step": 44800 }, { "epoch": 1.7020890786143328, "grad_norm": 7.689276695251465, "learning_rate": 1.659718182161611e-05, "loss": 0.0668, "step": 45056 }, { "epoch": 1.7117600392882777, "grad_norm": 4.36802339553833, "learning_rate": 1.657783990026822e-05, "loss": 0.0625, "step": 45312 }, { "epoch": 1.7214309999622228, "grad_norm": 2.3436222076416016, "learning_rate": 1.6558573533300594e-05, "loss": 0.0655, "step": 45568 }, { "epoch": 1.7311019606361677, "grad_norm": 1.5666533708572388, "learning_rate": 1.6539231611952706e-05, "loss": 0.0644, "step": 45824 }, { "epoch": 1.7407729213101129, "grad_norm": 4.3904266357421875, "learning_rate": 1.6519889690604814e-05, "loss": 0.0634, "step": 46080 }, { "epoch": 1.750443881984058, "grad_norm": 2.4941790103912354, "learning_rate": 1.6500547769256926e-05, "loss": 0.0641, "step": 46336 }, { "epoch": 1.760114842658003, "grad_norm": 3.0806963443756104, "learning_rate": 1.6481205847909034e-05, "loss": 0.0664, "step": 46592 }, { "epoch": 1.7697858033319482, "grad_norm": 6.179355621337891, "learning_rate": 1.6461863926561143e-05, "loss": 0.0626, "step": 46848 }, { "epoch": 1.7794567640058934, "grad_norm": 1.792417287826538, "learning_rate": 1.644252200521325e-05, "loss": 0.0623, "step": 47104 }, { "epoch": 1.7891277246798383, "grad_norm": 3.0433876514434814, "learning_rate": 1.6423180083865363e-05, "loss": 0.0629, "step": 47360 }, { "epoch": 1.7987986853537834, "grad_norm": 8.955931663513184, "learning_rate": 1.6403838162517475e-05, "loss": 0.064, "step": 47616 }, { "epoch": 1.8084696460277283, "grad_norm": 1.9222790002822876, "learning_rate": 1.6384496241169583e-05, "loss": 0.062, "step": 47872 }, { "epoch": 1.8181406067016734, "grad_norm": 6.602641582489014, "learning_rate": 1.6365154319821692e-05, "loss": 0.0625, "step": 48128 }, { "epoch": 1.8278115673756186, "grad_norm": 3.8623206615448, "learning_rate": 1.6345812398473804e-05, "loss": 0.0641, "step": 48384 }, { "epoch": 1.8374825280495637, "grad_norm": 3.5689499378204346, "learning_rate": 1.6326470477125912e-05, "loss": 0.061, "step": 48640 }, { "epoch": 1.8471534887235088, "grad_norm": 5.1966705322265625, "learning_rate": 1.6307128555778024e-05, "loss": 0.0603, "step": 48896 }, { "epoch": 1.856824449397454, "grad_norm": 8.878084182739258, "learning_rate": 1.6287862188810396e-05, "loss": 0.0609, "step": 49152 }, { "epoch": 1.8664954100713989, "grad_norm": 8.031649589538574, "learning_rate": 1.6268520267462508e-05, "loss": 0.0609, "step": 49408 }, { "epoch": 1.876166370745344, "grad_norm": 5.966855525970459, "learning_rate": 1.6249178346114616e-05, "loss": 0.0603, "step": 49664 }, { "epoch": 1.8858373314192889, "grad_norm": 5.956678867340088, "learning_rate": 1.622983642476673e-05, "loss": 0.0571, "step": 49920 }, { "epoch": 1.895508292093234, "grad_norm": 4.6985650062561035, "learning_rate": 1.6210494503418837e-05, "loss": 0.0608, "step": 50176 }, { "epoch": 1.9051792527671791, "grad_norm": 2.0274322032928467, "learning_rate": 1.6191152582070945e-05, "loss": 0.0601, "step": 50432 }, { "epoch": 1.9148502134411243, "grad_norm": 2.5451152324676514, "learning_rate": 1.6171810660723057e-05, "loss": 0.0585, "step": 50688 }, { "epoch": 1.9245211741150694, "grad_norm": 10.576590538024902, "learning_rate": 1.6152468739375165e-05, "loss": 0.0604, "step": 50944 }, { "epoch": 1.9341921347890145, "grad_norm": 2.8857650756835938, "learning_rate": 1.6133126818027277e-05, "loss": 0.0577, "step": 51200 }, { "epoch": 1.9438630954629594, "grad_norm": 3.5323078632354736, "learning_rate": 1.6113784896679386e-05, "loss": 0.0593, "step": 51456 }, { "epoch": 1.9535340561369046, "grad_norm": 1.838157057762146, "learning_rate": 1.6094442975331498e-05, "loss": 0.0607, "step": 51712 }, { "epoch": 1.9632050168108495, "grad_norm": 3.0781214237213135, "learning_rate": 1.6075101053983606e-05, "loss": 0.0579, "step": 51968 }, { "epoch": 1.9728759774847946, "grad_norm": 3.3382725715637207, "learning_rate": 1.6055759132635714e-05, "loss": 0.0583, "step": 52224 }, { "epoch": 1.9825469381587397, "grad_norm": 11.693552017211914, "learning_rate": 1.6036417211287826e-05, "loss": 0.06, "step": 52480 }, { "epoch": 1.9922178988326849, "grad_norm": 2.506922960281372, "learning_rate": 1.6017075289939935e-05, "loss": 0.0565, "step": 52736 }, { "epoch": 2.0, "eval_f1_B-DISEASE": 0.795667459204617, "eval_f1_B-MEDICATION": 0.921751878321422, "eval_f1_B-PROCEDURE": 0.8160781701088163, "eval_f1_B-SYMPTOM": 0.7897939991838038, "eval_f1_I-DISEASE": 0.840603319768283, "eval_f1_I-MEDICATION": 0.9362919771083036, "eval_f1_I-PROCEDURE": 0.8357941949344769, "eval_f1_I-SYMPTOM": 0.7996366525364875, "eval_f1_O": 0.9230678952157123, "eval_f1_macro": 0.8509650607091025, "eval_f1_micro": 0.8840404124180135, "eval_loss": 0.31923601031303406, "eval_precision_B-DISEASE": 0.8158783079808294, "eval_precision_B-MEDICATION": 0.9499527856468366, "eval_precision_B-PROCEDURE": 0.848526831070472, "eval_precision_B-SYMPTOM": 0.8200744316297579, "eval_precision_I-DISEASE": 0.8429946510571731, "eval_precision_I-MEDICATION": 0.9561669515355351, "eval_precision_I-PROCEDURE": 0.8788058788058788, "eval_precision_I-SYMPTOM": 0.8278156741398548, "eval_precision_O": 0.9126516403530364, "eval_precision_macro": 0.8725407946910416, "eval_precision_micro": 0.8876748775758254, "eval_rauc_macro": 0.904220184377927, "eval_rauc_micro": 0.9331179355327206, "eval_recall_B-DISEASE": 0.7764337272943602, "eval_recall_B-MEDICATION": 0.8951770777718455, "eval_recall_B-PROCEDURE": 0.7860198494182067, "eval_recall_B-SYMPTOM": 0.7616700889801505, "eval_recall_I-DISEASE": 0.8382255171289154, "eval_recall_I-MEDICATION": 0.9172264240757392, "eval_recall_I-PROCEDURE": 0.7967963312035382, "eval_recall_I-SYMPTOM": 0.7733129163980228, "eval_recall_O": 0.9337246601871894, "eval_recall_macro": 0.8309540658286632, "eval_recall_micro": 0.8804355875575709, "eval_roc_auc_B-DISEASE": 0.8865867148232266, "eval_roc_auc_B-MEDICATION": 0.9474921532385823, "eval_roc_auc_B-PROCEDURE": 0.8918013132047432, "eval_roc_auc_B-SYMPTOM": 0.8790279783173326, "eval_roc_auc_I-DISEASE": 0.9089187734702582, "eval_roc_auc_I-MEDICATION": 0.9583212327636237, "eval_roc_auc_I-PROCEDURE": 0.8930449895535946, "eval_roc_auc_I-SYMPTOM": 0.8741219681493123, "eval_roc_auc_O": 0.8986665358806698, "eval_runtime": 61.4999, "eval_samples_per_second": 176.797, "eval_steps_per_second": 22.114, "step": 52942 }, { "epoch": 2.00188885950663, "grad_norm": 6.549783706665039, "learning_rate": 1.5997733368592046e-05, "loss": 0.0527, "step": 52992 }, { "epoch": 2.011559820180575, "grad_norm": 17.193124771118164, "learning_rate": 1.597846700162442e-05, "loss": 0.0401, "step": 53248 }, { "epoch": 2.0212307808545202, "grad_norm": 9.684758186340332, "learning_rate": 1.595912508027653e-05, "loss": 0.0405, "step": 53504 }, { "epoch": 2.030901741528465, "grad_norm": 2.453227996826172, "learning_rate": 1.5939783158928642e-05, "loss": 0.0412, "step": 53760 }, { "epoch": 2.04057270220241, "grad_norm": 5.099764347076416, "learning_rate": 1.592044123758075e-05, "loss": 0.0392, "step": 54016 }, { "epoch": 2.050243662876355, "grad_norm": 2.8053245544433594, "learning_rate": 1.590109931623286e-05, "loss": 0.0388, "step": 54272 }, { "epoch": 2.0599146235503003, "grad_norm": 1.1759517192840576, "learning_rate": 1.5881832949265235e-05, "loss": 0.0411, "step": 54528 }, { "epoch": 2.0695855842242454, "grad_norm": 5.915517330169678, "learning_rate": 1.5862491027917343e-05, "loss": 0.0389, "step": 54784 }, { "epoch": 2.0792565448981906, "grad_norm": 8.0426664352417, "learning_rate": 1.5843149106569455e-05, "loss": 0.0382, "step": 55040 }, { "epoch": 2.0889275055721357, "grad_norm": 10.392659187316895, "learning_rate": 1.5823807185221564e-05, "loss": 0.0421, "step": 55296 }, { "epoch": 2.098598466246081, "grad_norm": 7.73749303817749, "learning_rate": 1.5804465263873675e-05, "loss": 0.0389, "step": 55552 }, { "epoch": 2.1082694269200255, "grad_norm": 5.712843418121338, "learning_rate": 1.5785123342525784e-05, "loss": 0.0403, "step": 55808 }, { "epoch": 2.1179403875939706, "grad_norm": 2.4857349395751953, "learning_rate": 1.5765781421177896e-05, "loss": 0.0389, "step": 56064 }, { "epoch": 2.1276113482679158, "grad_norm": 2.2378458976745605, "learning_rate": 1.5746439499830004e-05, "loss": 0.0395, "step": 56320 }, { "epoch": 2.137282308941861, "grad_norm": 5.0309739112854, "learning_rate": 1.5727097578482113e-05, "loss": 0.0407, "step": 56576 }, { "epoch": 2.146953269615806, "grad_norm": 3.6221115589141846, "learning_rate": 1.570775565713422e-05, "loss": 0.0413, "step": 56832 }, { "epoch": 2.156624230289751, "grad_norm": 4.810079574584961, "learning_rate": 1.5688413735786333e-05, "loss": 0.04, "step": 57088 }, { "epoch": 2.1662951909636963, "grad_norm": 2.51513671875, "learning_rate": 1.5669071814438445e-05, "loss": 0.0404, "step": 57344 }, { "epoch": 2.1759661516376414, "grad_norm": 2.1976306438446045, "learning_rate": 1.5649805447470817e-05, "loss": 0.0418, "step": 57600 }, { "epoch": 2.185637112311586, "grad_norm": 2.1660706996917725, "learning_rate": 1.563046352612293e-05, "loss": 0.0379, "step": 57856 }, { "epoch": 2.195308072985531, "grad_norm": 3.0370383262634277, "learning_rate": 1.561112160477504e-05, "loss": 0.0386, "step": 58112 }, { "epoch": 2.2049790336594763, "grad_norm": 2.958423614501953, "learning_rate": 1.5591855237807413e-05, "loss": 0.0395, "step": 58368 }, { "epoch": 2.2146499943334215, "grad_norm": 0.8426064848899841, "learning_rate": 1.5572513316459525e-05, "loss": 0.0396, "step": 58624 }, { "epoch": 2.2243209550073666, "grad_norm": 2.25600528717041, "learning_rate": 1.5553171395111633e-05, "loss": 0.0405, "step": 58880 }, { "epoch": 2.2339919156813117, "grad_norm": 2.133103609085083, "learning_rate": 1.553382947376374e-05, "loss": 0.0391, "step": 59136 }, { "epoch": 2.243662876355257, "grad_norm": 27.295085906982422, "learning_rate": 1.5514487552415853e-05, "loss": 0.0382, "step": 59392 }, { "epoch": 2.253333837029202, "grad_norm": 3.368842840194702, "learning_rate": 1.5495145631067962e-05, "loss": 0.0407, "step": 59648 }, { "epoch": 2.2630047977031467, "grad_norm": 1.4239710569381714, "learning_rate": 1.5475803709720074e-05, "loss": 0.0376, "step": 59904 }, { "epoch": 2.272675758377092, "grad_norm": 4.708951473236084, "learning_rate": 1.5456461788372182e-05, "loss": 0.0378, "step": 60160 }, { "epoch": 2.282346719051037, "grad_norm": 11.830906867980957, "learning_rate": 1.5437119867024294e-05, "loss": 0.0391, "step": 60416 }, { "epoch": 2.292017679724982, "grad_norm": 2.8490591049194336, "learning_rate": 1.5417777945676402e-05, "loss": 0.0395, "step": 60672 }, { "epoch": 2.301688640398927, "grad_norm": 3.398808240890503, "learning_rate": 1.539843602432851e-05, "loss": 0.0407, "step": 60928 }, { "epoch": 2.3113596010728723, "grad_norm": 4.087090492248535, "learning_rate": 1.537909410298062e-05, "loss": 0.0361, "step": 61184 }, { "epoch": 2.3210305617468174, "grad_norm": 2.446629762649536, "learning_rate": 1.535975218163273e-05, "loss": 0.0361, "step": 61440 }, { "epoch": 2.330701522420762, "grad_norm": 1.8066984415054321, "learning_rate": 1.5340410260284843e-05, "loss": 0.0399, "step": 61696 }, { "epoch": 2.3403724830947072, "grad_norm": 2.6665291786193848, "learning_rate": 1.532106833893695e-05, "loss": 0.0396, "step": 61952 }, { "epoch": 2.3500434437686524, "grad_norm": 0.9438909292221069, "learning_rate": 1.530172641758906e-05, "loss": 0.0385, "step": 62208 }, { "epoch": 2.3597144044425975, "grad_norm": 1.689215898513794, "learning_rate": 1.528238449624117e-05, "loss": 0.0399, "step": 62464 }, { "epoch": 2.3693853651165426, "grad_norm": 2.397761583328247, "learning_rate": 1.526304257489328e-05, "loss": 0.0363, "step": 62720 }, { "epoch": 2.3790563257904878, "grad_norm": 17.148651123046875, "learning_rate": 1.524370065354539e-05, "loss": 0.0379, "step": 62976 }, { "epoch": 2.388727286464433, "grad_norm": 7.684645652770996, "learning_rate": 1.52243587321975e-05, "loss": 0.038, "step": 63232 }, { "epoch": 2.398398247138378, "grad_norm": 2.3121824264526367, "learning_rate": 1.520501681084961e-05, "loss": 0.0363, "step": 63488 }, { "epoch": 2.408069207812323, "grad_norm": 4.012951374053955, "learning_rate": 1.518567488950172e-05, "loss": 0.0369, "step": 63744 }, { "epoch": 2.417740168486268, "grad_norm": 3.992743730545044, "learning_rate": 1.5166332968153829e-05, "loss": 0.037, "step": 64000 }, { "epoch": 2.427411129160213, "grad_norm": 2.0966529846191406, "learning_rate": 1.5147066601186205e-05, "loss": 0.0398, "step": 64256 }, { "epoch": 2.437082089834158, "grad_norm": 4.283209800720215, "learning_rate": 1.5127724679838316e-05, "loss": 0.0375, "step": 64512 }, { "epoch": 2.446753050508103, "grad_norm": 7.317829608917236, "learning_rate": 1.5108382758490425e-05, "loss": 0.0385, "step": 64768 }, { "epoch": 2.4564240111820483, "grad_norm": 2.1492598056793213, "learning_rate": 1.5089040837142535e-05, "loss": 0.038, "step": 65024 }, { "epoch": 2.4660949718559935, "grad_norm": 4.824232578277588, "learning_rate": 1.5069698915794643e-05, "loss": 0.0404, "step": 65280 }, { "epoch": 2.4757659325299386, "grad_norm": 2.757894992828369, "learning_rate": 1.5050356994446755e-05, "loss": 0.0379, "step": 65536 }, { "epoch": 2.4854368932038833, "grad_norm": 3.4582314491271973, "learning_rate": 1.5031015073098865e-05, "loss": 0.0352, "step": 65792 }, { "epoch": 2.4951078538778284, "grad_norm": 2.4710917472839355, "learning_rate": 1.5011673151750974e-05, "loss": 0.0363, "step": 66048 }, { "epoch": 2.5047788145517735, "grad_norm": 2.776700973510742, "learning_rate": 1.4992331230403084e-05, "loss": 0.0358, "step": 66304 }, { "epoch": 2.5144497752257187, "grad_norm": 3.758176326751709, "learning_rate": 1.4972989309055194e-05, "loss": 0.0356, "step": 66560 }, { "epoch": 2.524120735899664, "grad_norm": 2.5761642456054688, "learning_rate": 1.4953647387707304e-05, "loss": 0.0404, "step": 66816 }, { "epoch": 2.533791696573609, "grad_norm": 1.3668540716171265, "learning_rate": 1.4934381020739678e-05, "loss": 0.0366, "step": 67072 }, { "epoch": 2.543462657247554, "grad_norm": 3.8430099487304688, "learning_rate": 1.4915039099391788e-05, "loss": 0.0372, "step": 67328 }, { "epoch": 2.5531336179214987, "grad_norm": 5.29500675201416, "learning_rate": 1.4895697178043897e-05, "loss": 0.0358, "step": 67584 }, { "epoch": 2.5628045785954443, "grad_norm": 0.8562812805175781, "learning_rate": 1.4876355256696009e-05, "loss": 0.0374, "step": 67840 }, { "epoch": 2.572475539269389, "grad_norm": 4.466825008392334, "learning_rate": 1.4857013335348119e-05, "loss": 0.0359, "step": 68096 }, { "epoch": 2.582146499943334, "grad_norm": 12.638843536376953, "learning_rate": 1.4837746968380493e-05, "loss": 0.0362, "step": 68352 }, { "epoch": 2.5918174606172792, "grad_norm": 4.4277119636535645, "learning_rate": 1.4818405047032603e-05, "loss": 0.036, "step": 68608 }, { "epoch": 2.6014884212912244, "grad_norm": 1.1809728145599365, "learning_rate": 1.4799063125684711e-05, "loss": 0.035, "step": 68864 }, { "epoch": 2.6111593819651695, "grad_norm": 4.5768327713012695, "learning_rate": 1.4779721204336823e-05, "loss": 0.0374, "step": 69120 }, { "epoch": 2.6208303426391146, "grad_norm": 4.537430763244629, "learning_rate": 1.4760379282988933e-05, "loss": 0.0344, "step": 69376 }, { "epoch": 2.6305013033130598, "grad_norm": 1.4762442111968994, "learning_rate": 1.4741037361641042e-05, "loss": 0.0374, "step": 69632 }, { "epoch": 2.6401722639870044, "grad_norm": 1.5577633380889893, "learning_rate": 1.4721695440293152e-05, "loss": 0.0353, "step": 69888 }, { "epoch": 2.6498432246609496, "grad_norm": 4.222722053527832, "learning_rate": 1.4702353518945262e-05, "loss": 0.0359, "step": 70144 }, { "epoch": 2.6595141853348947, "grad_norm": 1.9563344717025757, "learning_rate": 1.4683011597597372e-05, "loss": 0.0379, "step": 70400 }, { "epoch": 2.66918514600884, "grad_norm": 5.85068416595459, "learning_rate": 1.466366967624948e-05, "loss": 0.0344, "step": 70656 }, { "epoch": 2.678856106682785, "grad_norm": 2.2116239070892334, "learning_rate": 1.464432775490159e-05, "loss": 0.039, "step": 70912 }, { "epoch": 2.68852706735673, "grad_norm": 4.683871269226074, "learning_rate": 1.4624985833553702e-05, "loss": 0.0343, "step": 71168 }, { "epoch": 2.698198028030675, "grad_norm": 1.9998408555984497, "learning_rate": 1.460564391220581e-05, "loss": 0.0367, "step": 71424 }, { "epoch": 2.70786898870462, "grad_norm": 1.950804352760315, "learning_rate": 1.4586301990857921e-05, "loss": 0.0354, "step": 71680 }, { "epoch": 2.7175399493785655, "grad_norm": 2.9149844646453857, "learning_rate": 1.456696006951003e-05, "loss": 0.0339, "step": 71936 }, { "epoch": 2.72721091005251, "grad_norm": 4.158403396606445, "learning_rate": 1.4547618148162141e-05, "loss": 0.0358, "step": 72192 }, { "epoch": 2.7368818707264553, "grad_norm": 1.9110437631607056, "learning_rate": 1.4528351781194517e-05, "loss": 0.0363, "step": 72448 }, { "epoch": 2.7465528314004004, "grad_norm": 4.942687034606934, "learning_rate": 1.4509009859846625e-05, "loss": 0.0331, "step": 72704 }, { "epoch": 2.7562237920743455, "grad_norm": 4.669269561767578, "learning_rate": 1.4489667938498735e-05, "loss": 0.0339, "step": 72960 }, { "epoch": 2.7658947527482907, "grad_norm": 2.782804012298584, "learning_rate": 1.4470326017150846e-05, "loss": 0.0337, "step": 73216 }, { "epoch": 2.775565713422236, "grad_norm": 0.8589828014373779, "learning_rate": 1.4450984095802956e-05, "loss": 0.0348, "step": 73472 }, { "epoch": 2.785236674096181, "grad_norm": 6.355395793914795, "learning_rate": 1.4431642174455066e-05, "loss": 0.0347, "step": 73728 }, { "epoch": 2.7949076347701256, "grad_norm": 1.7806596755981445, "learning_rate": 1.4412300253107174e-05, "loss": 0.035, "step": 73984 }, { "epoch": 2.8045785954440707, "grad_norm": 5.5398850440979, "learning_rate": 1.4392958331759286e-05, "loss": 0.0309, "step": 74240 }, { "epoch": 2.814249556118016, "grad_norm": 1.2205835580825806, "learning_rate": 1.437369196479166e-05, "loss": 0.0318, "step": 74496 }, { "epoch": 2.823920516791961, "grad_norm": 4.248105525970459, "learning_rate": 1.435435004344377e-05, "loss": 0.0347, "step": 74752 }, { "epoch": 2.833591477465906, "grad_norm": 1.5058479309082031, "learning_rate": 1.4335083676476144e-05, "loss": 0.0338, "step": 75008 }, { "epoch": 2.8432624381398512, "grad_norm": 3.8759660720825195, "learning_rate": 1.4315741755128254e-05, "loss": 0.0343, "step": 75264 }, { "epoch": 2.8529333988137964, "grad_norm": 16.488771438598633, "learning_rate": 1.4296399833780366e-05, "loss": 0.0338, "step": 75520 }, { "epoch": 2.862604359487741, "grad_norm": 6.564029693603516, "learning_rate": 1.4277057912432475e-05, "loss": 0.0312, "step": 75776 }, { "epoch": 2.8722753201616866, "grad_norm": 1.345203161239624, "learning_rate": 1.4257715991084585e-05, "loss": 0.0332, "step": 76032 }, { "epoch": 2.8819462808356313, "grad_norm": 2.0033822059631348, "learning_rate": 1.4238374069736693e-05, "loss": 0.0322, "step": 76288 }, { "epoch": 2.8916172415095764, "grad_norm": 6.844017505645752, "learning_rate": 1.4219032148388805e-05, "loss": 0.0319, "step": 76544 }, { "epoch": 2.9012882021835216, "grad_norm": 4.2425150871276855, "learning_rate": 1.4199690227040915e-05, "loss": 0.0322, "step": 76800 }, { "epoch": 2.9109591628574667, "grad_norm": 1.8265749216079712, "learning_rate": 1.4180348305693024e-05, "loss": 0.0322, "step": 77056 }, { "epoch": 2.920630123531412, "grad_norm": 3.0552210807800293, "learning_rate": 1.4161006384345134e-05, "loss": 0.033, "step": 77312 }, { "epoch": 2.930301084205357, "grad_norm": 2.102796792984009, "learning_rate": 1.4141664462997244e-05, "loss": 0.0346, "step": 77568 }, { "epoch": 2.939972044879302, "grad_norm": 1.903757929801941, "learning_rate": 1.4122322541649354e-05, "loss": 0.0315, "step": 77824 }, { "epoch": 2.9496430055532468, "grad_norm": 9.322936058044434, "learning_rate": 1.4102980620301462e-05, "loss": 0.0336, "step": 78080 }, { "epoch": 2.959313966227192, "grad_norm": 1.862209677696228, "learning_rate": 1.4083714253333838e-05, "loss": 0.0328, "step": 78336 }, { "epoch": 2.968984926901137, "grad_norm": 3.4806630611419678, "learning_rate": 1.4064372331985946e-05, "loss": 0.0321, "step": 78592 }, { "epoch": 2.978655887575082, "grad_norm": 7.490905284881592, "learning_rate": 1.4045030410638058e-05, "loss": 0.0321, "step": 78848 }, { "epoch": 2.9883268482490273, "grad_norm": 2.008312940597534, "learning_rate": 1.4025688489290168e-05, "loss": 0.032, "step": 79104 }, { "epoch": 2.9979978089229724, "grad_norm": 5.4629645347595215, "learning_rate": 1.4006346567942277e-05, "loss": 0.0316, "step": 79360 }, { "epoch": 3.0, "eval_f1_B-DISEASE": 0.8058758050409478, "eval_f1_B-MEDICATION": 0.9331298806211291, "eval_f1_B-PROCEDURE": 0.8246479186103336, "eval_f1_B-SYMPTOM": 0.7958159437899519, "eval_f1_I-DISEASE": 0.8446797498113607, "eval_f1_I-MEDICATION": 0.9445428820775924, "eval_f1_I-PROCEDURE": 0.8431497703012539, "eval_f1_I-SYMPTOM": 0.8064467139982535, "eval_f1_O": 0.9242880224344192, "eval_f1_macro": 0.8580640762983602, "eval_f1_micro": 0.8867284724320147, "eval_loss": 0.346194326877594, "eval_precision_B-DISEASE": 0.8078026619909142, "eval_precision_B-MEDICATION": 0.9413256066642521, "eval_precision_B-PROCEDURE": 0.8576841327049256, "eval_precision_B-SYMPTOM": 0.8196756992055719, "eval_precision_I-DISEASE": 0.8398561507936508, "eval_precision_I-MEDICATION": 0.9445678620543743, "eval_precision_I-PROCEDURE": 0.8685191753683246, "eval_precision_I-SYMPTOM": 0.8141611818689563, "eval_precision_O": 0.922143895574372, "eval_precision_macro": 0.8684151518028158, "eval_precision_micro": 0.8893727212165927, "eval_rauc_macro": 0.9135258778318731, "eval_rauc_micro": 0.9350417116157864, "eval_recall_B-DISEASE": 0.8039581185055922, "eval_recall_B-MEDICATION": 0.9250756362342054, "eval_recall_B-PROCEDURE": 0.7940622861054073, "eval_recall_B-SYMPTOM": 0.773305954825462, "eval_recall_I-DISEASE": 0.8495590762553469, "eval_recall_I-MEDICATION": 0.944517903422013, "eval_recall_I-PROCEDURE": 0.8192203807518748, "eval_recall_I-SYMPTOM": 0.7988770685579196, "eval_recall_O": 0.9264421433839181, "eval_recall_macro": 0.8483353964490822, "eval_recall_micro": 0.8840999005950059, "eval_roc_auc_B-DISEASE": 0.9001994739703317, "eval_roc_auc_B-MEDICATION": 0.9624199730237505, "eval_roc_auc_B-PROCEDURE": 0.89589622737178, "eval_roc_auc_B-SYMPTOM": 0.8848133448211472, "eval_roc_auc_I-DISEASE": 0.914201809349682, "eval_roc_auc_I-MEDICATION": 0.9718740532463399, "eval_roc_auc_I-PROCEDURE": 0.9035684876000457, "eval_roc_auc_I-SYMPTOM": 0.8852284244551051, "eval_roc_auc_O": 0.903531106648675, "eval_runtime": 61.221, "eval_samples_per_second": 177.602, "eval_steps_per_second": 22.215, "step": 79413 }, { "epoch": 3.0076687695969175, "grad_norm": 4.327937602996826, "learning_rate": 1.3987004646594387e-05, "loss": 0.0237, "step": 79616 }, { "epoch": 3.0173397302708627, "grad_norm": 1.6373872756958008, "learning_rate": 1.3967662725246499e-05, "loss": 0.022, "step": 79872 }, { "epoch": 3.0270106909448073, "grad_norm": 3.250305652618408, "learning_rate": 1.3948320803898607e-05, "loss": 0.0246, "step": 80128 }, { "epoch": 3.0366816516187525, "grad_norm": 1.6971690654754639, "learning_rate": 1.3928978882550717e-05, "loss": 0.0236, "step": 80384 }, { "epoch": 3.0463526122926976, "grad_norm": 5.478879451751709, "learning_rate": 1.3909636961202826e-05, "loss": 0.0219, "step": 80640 }, { "epoch": 3.0560235729666427, "grad_norm": 2.4806175231933594, "learning_rate": 1.3890295039854938e-05, "loss": 0.0236, "step": 80896 }, { "epoch": 3.065694533640588, "grad_norm": 1.5560436248779297, "learning_rate": 1.3870953118507046e-05, "loss": 0.0229, "step": 81152 }, { "epoch": 3.075365494314533, "grad_norm": 10.51571273803711, "learning_rate": 1.3851611197159156e-05, "loss": 0.0246, "step": 81408 }, { "epoch": 3.085036454988478, "grad_norm": 4.828140735626221, "learning_rate": 1.3832269275811265e-05, "loss": 0.0225, "step": 81664 }, { "epoch": 3.0947074156624232, "grad_norm": 0.9167439937591553, "learning_rate": 1.3812927354463376e-05, "loss": 0.0241, "step": 81920 }, { "epoch": 3.104378376336368, "grad_norm": 4.8147454261779785, "learning_rate": 1.3793585433115487e-05, "loss": 0.0226, "step": 82176 }, { "epoch": 3.114049337010313, "grad_norm": 5.181445121765137, "learning_rate": 1.3774243511767595e-05, "loss": 0.0202, "step": 82432 }, { "epoch": 3.123720297684258, "grad_norm": 1.4374691247940063, "learning_rate": 1.375497714479997e-05, "loss": 0.0221, "step": 82688 }, { "epoch": 3.1333912583582033, "grad_norm": 3.805264949798584, "learning_rate": 1.3735635223452082e-05, "loss": 0.0241, "step": 82944 }, { "epoch": 3.1430622190321484, "grad_norm": 3.3050577640533447, "learning_rate": 1.3716368856484456e-05, "loss": 0.0225, "step": 83200 }, { "epoch": 3.1527331797060936, "grad_norm": 1.6958988904953003, "learning_rate": 1.3697026935136567e-05, "loss": 0.0233, "step": 83456 }, { "epoch": 3.1624041403800387, "grad_norm": 2.1749958992004395, "learning_rate": 1.3677685013788675e-05, "loss": 0.024, "step": 83712 }, { "epoch": 3.1720751010539834, "grad_norm": 1.0387561321258545, "learning_rate": 1.3658343092440785e-05, "loss": 0.0235, "step": 83968 }, { "epoch": 3.1817460617279285, "grad_norm": 4.311033248901367, "learning_rate": 1.3639001171092895e-05, "loss": 0.025, "step": 84224 }, { "epoch": 3.1914170224018736, "grad_norm": 1.0629218816757202, "learning_rate": 1.3619659249745005e-05, "loss": 0.0242, "step": 84480 }, { "epoch": 3.2010879830758188, "grad_norm": 4.607676029205322, "learning_rate": 1.3600317328397114e-05, "loss": 0.0218, "step": 84736 }, { "epoch": 3.210758943749764, "grad_norm": 1.2249504327774048, "learning_rate": 1.3580975407049224e-05, "loss": 0.0217, "step": 84992 }, { "epoch": 3.220429904423709, "grad_norm": 3.8173789978027344, "learning_rate": 1.3561633485701336e-05, "loss": 0.0212, "step": 85248 }, { "epoch": 3.230100865097654, "grad_norm": 5.286001205444336, "learning_rate": 1.3542291564353444e-05, "loss": 0.0234, "step": 85504 }, { "epoch": 3.2397718257715993, "grad_norm": 0.9266921281814575, "learning_rate": 1.3522949643005554e-05, "loss": 0.0227, "step": 85760 }, { "epoch": 3.2494427864455444, "grad_norm": 1.9286329746246338, "learning_rate": 1.3503607721657663e-05, "loss": 0.0216, "step": 86016 }, { "epoch": 3.259113747119489, "grad_norm": 7.073780536651611, "learning_rate": 1.3484265800309775e-05, "loss": 0.0229, "step": 86272 }, { "epoch": 3.268784707793434, "grad_norm": 11.505043983459473, "learning_rate": 1.3464923878961885e-05, "loss": 0.0216, "step": 86528 }, { "epoch": 3.2784556684673793, "grad_norm": 2.463899850845337, "learning_rate": 1.3445581957613993e-05, "loss": 0.0238, "step": 86784 }, { "epoch": 3.2881266291413245, "grad_norm": 2.2217423915863037, "learning_rate": 1.3426240036266103e-05, "loss": 0.0244, "step": 87040 }, { "epoch": 3.2977975898152696, "grad_norm": 2.690162181854248, "learning_rate": 1.3406898114918213e-05, "loss": 0.0233, "step": 87296 }, { "epoch": 3.3074685504892147, "grad_norm": 0.6873595118522644, "learning_rate": 1.3387631747950589e-05, "loss": 0.0212, "step": 87552 }, { "epoch": 3.31713951116316, "grad_norm": 12.570816040039062, "learning_rate": 1.3368289826602698e-05, "loss": 0.021, "step": 87808 }, { "epoch": 3.3268104718371045, "grad_norm": 1.3393861055374146, "learning_rate": 1.3348947905254808e-05, "loss": 0.0204, "step": 88064 }, { "epoch": 3.3364814325110497, "grad_norm": 0.6096575856208801, "learning_rate": 1.332960598390692e-05, "loss": 0.0218, "step": 88320 }, { "epoch": 3.346152393184995, "grad_norm": 4.549058437347412, "learning_rate": 1.3310264062559028e-05, "loss": 0.0232, "step": 88576 }, { "epoch": 3.35582335385894, "grad_norm": 2.827040195465088, "learning_rate": 1.3290922141211138e-05, "loss": 0.023, "step": 88832 }, { "epoch": 3.365494314532885, "grad_norm": 4.081531524658203, "learning_rate": 1.3271580219863246e-05, "loss": 0.0202, "step": 89088 }, { "epoch": 3.37516527520683, "grad_norm": 16.549381256103516, "learning_rate": 1.3252238298515358e-05, "loss": 0.023, "step": 89344 }, { "epoch": 3.3848362358807753, "grad_norm": 1.166902780532837, "learning_rate": 1.3232896377167468e-05, "loss": 0.0217, "step": 89600 }, { "epoch": 3.3945071965547204, "grad_norm": 1.8393259048461914, "learning_rate": 1.3213554455819577e-05, "loss": 0.0226, "step": 89856 }, { "epoch": 3.4041781572286656, "grad_norm": 3.180155038833618, "learning_rate": 1.3194288088851953e-05, "loss": 0.0216, "step": 90112 }, { "epoch": 3.4138491179026103, "grad_norm": 0.5230717658996582, "learning_rate": 1.3174946167504061e-05, "loss": 0.022, "step": 90368 }, { "epoch": 3.4235200785765554, "grad_norm": 15.037604331970215, "learning_rate": 1.3155604246156173e-05, "loss": 0.0234, "step": 90624 }, { "epoch": 3.4331910392505005, "grad_norm": 11.155952453613281, "learning_rate": 1.3136262324808283e-05, "loss": 0.0224, "step": 90880 }, { "epoch": 3.4428619999244456, "grad_norm": 2.1080737113952637, "learning_rate": 1.3116920403460391e-05, "loss": 0.023, "step": 91136 }, { "epoch": 3.4525329605983908, "grad_norm": 0.7303668856620789, "learning_rate": 1.3097578482112501e-05, "loss": 0.0217, "step": 91392 }, { "epoch": 3.462203921272336, "grad_norm": 0.6222452521324158, "learning_rate": 1.3078236560764612e-05, "loss": 0.0219, "step": 91648 }, { "epoch": 3.471874881946281, "grad_norm": 0.8807787299156189, "learning_rate": 1.3058894639416722e-05, "loss": 0.0236, "step": 91904 }, { "epoch": 3.4815458426202257, "grad_norm": 0.9494552612304688, "learning_rate": 1.3039628272449096e-05, "loss": 0.0208, "step": 92160 }, { "epoch": 3.491216803294171, "grad_norm": 14.498435020446777, "learning_rate": 1.3020286351101206e-05, "loss": 0.0241, "step": 92416 }, { "epoch": 3.500887763968116, "grad_norm": 1.3170123100280762, "learning_rate": 1.3000944429753314e-05, "loss": 0.0213, "step": 92672 }, { "epoch": 3.510558724642061, "grad_norm": 1.3304574489593506, "learning_rate": 1.2981602508405426e-05, "loss": 0.0216, "step": 92928 }, { "epoch": 3.520229685316006, "grad_norm": 0.5469146966934204, "learning_rate": 1.2962260587057536e-05, "loss": 0.0198, "step": 93184 }, { "epoch": 3.5299006459899513, "grad_norm": 1.7149643898010254, "learning_rate": 1.2942918665709645e-05, "loss": 0.0197, "step": 93440 }, { "epoch": 3.5395716066638965, "grad_norm": 2.092782974243164, "learning_rate": 1.292365229874202e-05, "loss": 0.0225, "step": 93696 }, { "epoch": 3.549242567337841, "grad_norm": 1.180370807647705, "learning_rate": 1.2904310377394132e-05, "loss": 0.0204, "step": 93952 }, { "epoch": 3.5589135280117867, "grad_norm": 16.842605590820312, "learning_rate": 1.288496845604624e-05, "loss": 0.0214, "step": 94208 }, { "epoch": 3.5685844886857314, "grad_norm": 3.62001895904541, "learning_rate": 1.286562653469835e-05, "loss": 0.0241, "step": 94464 }, { "epoch": 3.5782554493596765, "grad_norm": 2.4327309131622314, "learning_rate": 1.284628461335046e-05, "loss": 0.0192, "step": 94720 }, { "epoch": 3.5879264100336217, "grad_norm": 5.820268154144287, "learning_rate": 1.2826942692002571e-05, "loss": 0.0223, "step": 94976 }, { "epoch": 3.597597370707567, "grad_norm": 3.0629537105560303, "learning_rate": 1.280760077065468e-05, "loss": 0.02, "step": 95232 }, { "epoch": 3.607268331381512, "grad_norm": 2.9143710136413574, "learning_rate": 1.278825884930679e-05, "loss": 0.0198, "step": 95488 }, { "epoch": 3.616939292055457, "grad_norm": 1.7662220001220703, "learning_rate": 1.2768916927958898e-05, "loss": 0.0225, "step": 95744 }, { "epoch": 3.626610252729402, "grad_norm": 3.5561130046844482, "learning_rate": 1.274957500661101e-05, "loss": 0.0228, "step": 96000 }, { "epoch": 3.636281213403347, "grad_norm": 5.7032470703125, "learning_rate": 1.273023308526312e-05, "loss": 0.0208, "step": 96256 }, { "epoch": 3.645952174077292, "grad_norm": 1.8125163316726685, "learning_rate": 1.2710891163915228e-05, "loss": 0.0212, "step": 96512 }, { "epoch": 3.655623134751237, "grad_norm": 4.229706764221191, "learning_rate": 1.2691549242567338e-05, "loss": 0.0238, "step": 96768 }, { "epoch": 3.6652940954251823, "grad_norm": 1.200060486793518, "learning_rate": 1.267220732121945e-05, "loss": 0.0189, "step": 97024 }, { "epoch": 3.6749650560991274, "grad_norm": 3.1629979610443115, "learning_rate": 1.2652865399871559e-05, "loss": 0.0207, "step": 97280 }, { "epoch": 3.6846360167730725, "grad_norm": 3.1145436763763428, "learning_rate": 1.2633523478523669e-05, "loss": 0.0201, "step": 97536 }, { "epoch": 3.6943069774470176, "grad_norm": 2.600019693374634, "learning_rate": 1.2614181557175777e-05, "loss": 0.0208, "step": 97792 }, { "epoch": 3.7039779381209623, "grad_norm": 3.338853120803833, "learning_rate": 1.2594915190208153e-05, "loss": 0.0209, "step": 98048 }, { "epoch": 3.713648898794908, "grad_norm": 6.754782676696777, "learning_rate": 1.2575573268860263e-05, "loss": 0.0187, "step": 98304 }, { "epoch": 3.7233198594688526, "grad_norm": 3.4177420139312744, "learning_rate": 1.2556231347512373e-05, "loss": 0.0216, "step": 98560 }, { "epoch": 3.7329908201427977, "grad_norm": 1.6833980083465576, "learning_rate": 1.2536889426164482e-05, "loss": 0.0202, "step": 98816 }, { "epoch": 3.742661780816743, "grad_norm": 2.7053074836730957, "learning_rate": 1.2517547504816592e-05, "loss": 0.0191, "step": 99072 }, { "epoch": 3.752332741490688, "grad_norm": 2.020542621612549, "learning_rate": 1.2498205583468704e-05, "loss": 0.0195, "step": 99328 }, { "epoch": 3.762003702164633, "grad_norm": 4.648097515106201, "learning_rate": 1.2478863662120812e-05, "loss": 0.0208, "step": 99584 }, { "epoch": 3.771674662838578, "grad_norm": 1.1168490648269653, "learning_rate": 1.2459521740772922e-05, "loss": 0.0194, "step": 99840 }, { "epoch": 3.7813456235125233, "grad_norm": 0.9811049103736877, "learning_rate": 1.2440255373805296e-05, "loss": 0.0207, "step": 100096 }, { "epoch": 3.791016584186468, "grad_norm": 3.2866318225860596, "learning_rate": 1.2420913452457408e-05, "loss": 0.0208, "step": 100352 }, { "epoch": 3.800687544860413, "grad_norm": 1.0944135189056396, "learning_rate": 1.2401571531109518e-05, "loss": 0.0216, "step": 100608 }, { "epoch": 3.8103585055343583, "grad_norm": 1.5802284479141235, "learning_rate": 1.2382229609761627e-05, "loss": 0.0215, "step": 100864 }, { "epoch": 3.8200294662083034, "grad_norm": 15.70626163482666, "learning_rate": 1.2362887688413737e-05, "loss": 0.0224, "step": 101120 }, { "epoch": 3.8297004268822485, "grad_norm": 2.241199016571045, "learning_rate": 1.2343545767065847e-05, "loss": 0.0203, "step": 101376 }, { "epoch": 3.8393713875561937, "grad_norm": 1.7189942598342896, "learning_rate": 1.2324203845717957e-05, "loss": 0.0202, "step": 101632 }, { "epoch": 3.849042348230139, "grad_norm": 3.025250196456909, "learning_rate": 1.2304861924370065e-05, "loss": 0.0214, "step": 101888 }, { "epoch": 3.8587133089040835, "grad_norm": 0.44517338275909424, "learning_rate": 1.2285595557402441e-05, "loss": 0.0213, "step": 102144 }, { "epoch": 3.868384269578029, "grad_norm": 1.4796372652053833, "learning_rate": 1.226625363605455e-05, "loss": 0.0214, "step": 102400 }, { "epoch": 3.8780552302519737, "grad_norm": 3.5950703620910645, "learning_rate": 1.2246911714706661e-05, "loss": 0.0195, "step": 102656 }, { "epoch": 3.887726190925919, "grad_norm": 3.87646222114563, "learning_rate": 1.2227569793358771e-05, "loss": 0.0194, "step": 102912 }, { "epoch": 3.897397151599864, "grad_norm": 6.18682336807251, "learning_rate": 1.220822787201088e-05, "loss": 0.0186, "step": 103168 }, { "epoch": 3.907068112273809, "grad_norm": 4.5957207679748535, "learning_rate": 1.218888595066299e-05, "loss": 0.0193, "step": 103424 }, { "epoch": 3.9167390729477543, "grad_norm": 11.035417556762695, "learning_rate": 1.2169544029315102e-05, "loss": 0.0193, "step": 103680 }, { "epoch": 3.9264100336216994, "grad_norm": 1.2352112531661987, "learning_rate": 1.215020210796721e-05, "loss": 0.0181, "step": 103936 }, { "epoch": 3.9360809942956445, "grad_norm": 0.506557047367096, "learning_rate": 1.213086018661932e-05, "loss": 0.0187, "step": 104192 }, { "epoch": 3.945751954969589, "grad_norm": 2.8846030235290527, "learning_rate": 1.2111593819651694e-05, "loss": 0.0196, "step": 104448 }, { "epoch": 3.9554229156435343, "grad_norm": 1.4602288007736206, "learning_rate": 1.2092251898303806e-05, "loss": 0.0192, "step": 104704 }, { "epoch": 3.9650938763174794, "grad_norm": 13.7206392288208, "learning_rate": 1.207298553133618e-05, "loss": 0.0191, "step": 104960 }, { "epoch": 3.9747648369914246, "grad_norm": 1.7238157987594604, "learning_rate": 1.205364360998829e-05, "loss": 0.0201, "step": 105216 }, { "epoch": 3.9844357976653697, "grad_norm": 2.0438215732574463, "learning_rate": 1.20343016886404e-05, "loss": 0.0182, "step": 105472 }, { "epoch": 3.994106758339315, "grad_norm": 1.2184364795684814, "learning_rate": 1.2014959767292509e-05, "loss": 0.019, "step": 105728 }, { "epoch": 4.0, "eval_f1_B-DISEASE": 0.808675320486979, "eval_f1_B-MEDICATION": 0.9266381766381766, "eval_f1_B-PROCEDURE": 0.8295174204717594, "eval_f1_B-SYMPTOM": 0.7980295566502463, "eval_f1_I-DISEASE": 0.8489476041200179, "eval_f1_I-MEDICATION": 0.9397869262133368, "eval_f1_I-PROCEDURE": 0.847453216112688, "eval_f1_I-SYMPTOM": 0.8079173624618605, "eval_f1_O": 0.9249064875950979, "eval_f1_macro": 0.8590968967500181, "eval_f1_micro": 0.8881361203919136, "eval_loss": 0.4215824604034424, "eval_precision_B-DISEASE": 0.8221985408640052, "eval_precision_B-MEDICATION": 0.927133440228042, "eval_precision_B-PROCEDURE": 0.8393623543838136, "eval_precision_B-SYMPTOM": 0.8061812467260345, "eval_precision_I-DISEASE": 0.8662832447676619, "eval_precision_I-MEDICATION": 0.9348440443793176, "eval_precision_I-PROCEDURE": 0.8446231286120726, "eval_precision_I-SYMPTOM": 0.8113340449395586, "eval_precision_O": 0.9229153657042629, "eval_precision_macro": 0.8638750456227521, "eval_precision_micro": 0.88976580540172, "eval_rauc_macro": 0.9166627680103409, "eval_rauc_micro": 0.9362568980657558, "eval_recall_B-DISEASE": 0.795589751725232, "eval_recall_B-MEDICATION": 0.9261434418935753, "eval_recall_B-PROCEDURE": 0.8199007529089665, "eval_recall_B-SYMPTOM": 0.7900410677618069, "eval_recall_I-DISEASE": 0.8322921762691454, "eval_recall_I-MEDICATION": 0.9447823557412599, "eval_recall_I-PROCEDURE": 0.8503023329824475, "eval_recall_I-SYMPTOM": 0.8045293359123147, "eval_recall_O": 0.9269062194588178, "eval_recall_macro": 0.8544986038503963, "eval_recall_micro": 0.886512394293185, "eval_roc_auc_B-DISEASE": 0.8961942453514548, "eval_roc_auc_B-MEDICATION": 0.9629229597024016, "eval_roc_auc_B-PROCEDURE": 0.9085987950526558, "eval_roc_auc_B-SYMPTOM": 0.8929666290562625, "eval_roc_auc_I-DISEASE": 0.9077573774887775, "eval_roc_auc_I-MEDICATION": 0.9719339273090749, "eval_roc_auc_I-PROCEDURE": 0.9175308611542671, "eval_roc_auc_I-SYMPTOM": 0.8876856873124548, "eval_roc_auc_O": 0.9043744296657181, "eval_runtime": 61.4869, "eval_samples_per_second": 176.835, "eval_steps_per_second": 22.119, "step": 105884 }, { "epoch": 4.00377771901326, "grad_norm": 0.7948421835899353, "learning_rate": 1.199561784594462e-05, "loss": 0.0178, "step": 105984 }, { "epoch": 4.013448679687205, "grad_norm": 0.8098104596138, "learning_rate": 1.1976275924596729e-05, "loss": 0.0143, "step": 106240 }, { "epoch": 4.02311964036115, "grad_norm": 3.08135724067688, "learning_rate": 1.195693400324884e-05, "loss": 0.0134, "step": 106496 }, { "epoch": 4.032790601035095, "grad_norm": 1.1449787616729736, "learning_rate": 1.1937592081900948e-05, "loss": 0.0133, "step": 106752 }, { "epoch": 4.0424615617090405, "grad_norm": 2.6626508235931396, "learning_rate": 1.1918325714933323e-05, "loss": 0.0136, "step": 107008 }, { "epoch": 4.052132522382985, "grad_norm": 1.2061920166015625, "learning_rate": 1.1898983793585435e-05, "loss": 0.0145, "step": 107264 }, { "epoch": 4.06180348305693, "grad_norm": 0.4288395941257477, "learning_rate": 1.1879641872237544e-05, "loss": 0.0127, "step": 107520 }, { "epoch": 4.071474443730875, "grad_norm": 3.4469873905181885, "learning_rate": 1.1860299950889654e-05, "loss": 0.0143, "step": 107776 }, { "epoch": 4.08114540440482, "grad_norm": 3.4651808738708496, "learning_rate": 1.1840958029541762e-05, "loss": 0.0137, "step": 108032 }, { "epoch": 4.090816365078766, "grad_norm": 4.205618381500244, "learning_rate": 1.1821616108193874e-05, "loss": 0.0136, "step": 108288 }, { "epoch": 4.10048732575271, "grad_norm": 0.8337300419807434, "learning_rate": 1.1802274186845984e-05, "loss": 0.0126, "step": 108544 }, { "epoch": 4.110158286426656, "grad_norm": 5.309538841247559, "learning_rate": 1.1782932265498093e-05, "loss": 0.0137, "step": 108800 }, { "epoch": 4.119829247100601, "grad_norm": 0.6177698969841003, "learning_rate": 1.1763590344150204e-05, "loss": 0.0147, "step": 109056 }, { "epoch": 4.129500207774546, "grad_norm": 2.2366254329681396, "learning_rate": 1.1744248422802313e-05, "loss": 0.0144, "step": 109312 }, { "epoch": 4.139171168448491, "grad_norm": 1.5923917293548584, "learning_rate": 1.1724906501454423e-05, "loss": 0.0124, "step": 109568 }, { "epoch": 4.1488421291224356, "grad_norm": 0.6197337508201599, "learning_rate": 1.1705564580106531e-05, "loss": 0.0128, "step": 109824 }, { "epoch": 4.158513089796381, "grad_norm": 1.5513421297073364, "learning_rate": 1.1686222658758643e-05, "loss": 0.0133, "step": 110080 }, { "epoch": 4.168184050470326, "grad_norm": 0.4733668863773346, "learning_rate": 1.1666880737410753e-05, "loss": 0.013, "step": 110336 }, { "epoch": 4.177855011144271, "grad_norm": 0.9195311069488525, "learning_rate": 1.1647538816062862e-05, "loss": 0.0147, "step": 110592 }, { "epoch": 4.187525971818216, "grad_norm": 0.4619844853878021, "learning_rate": 1.1628272449095237e-05, "loss": 0.0149, "step": 110848 }, { "epoch": 4.197196932492162, "grad_norm": 0.4427216351032257, "learning_rate": 1.1608930527747346e-05, "loss": 0.0146, "step": 111104 }, { "epoch": 4.206867893166106, "grad_norm": 2.1087565422058105, "learning_rate": 1.1589588606399458e-05, "loss": 0.0141, "step": 111360 }, { "epoch": 4.216538853840051, "grad_norm": 1.2194585800170898, "learning_rate": 1.1570246685051568e-05, "loss": 0.0164, "step": 111616 }, { "epoch": 4.226209814513997, "grad_norm": 1.4488071203231812, "learning_rate": 1.1550904763703676e-05, "loss": 0.0158, "step": 111872 }, { "epoch": 4.235880775187941, "grad_norm": 5.222316265106201, "learning_rate": 1.1531562842355786e-05, "loss": 0.0144, "step": 112128 }, { "epoch": 4.245551735861887, "grad_norm": 0.9832548499107361, "learning_rate": 1.1512220921007897e-05, "loss": 0.0148, "step": 112384 }, { "epoch": 4.2552226965358315, "grad_norm": 3.2491071224212646, "learning_rate": 1.1492878999660007e-05, "loss": 0.0141, "step": 112640 }, { "epoch": 4.264893657209777, "grad_norm": 0.40463200211524963, "learning_rate": 1.1473537078312115e-05, "loss": 0.0153, "step": 112896 }, { "epoch": 4.274564617883722, "grad_norm": 0.7759467363357544, "learning_rate": 1.1454195156964225e-05, "loss": 0.0137, "step": 113152 }, { "epoch": 4.2842355785576665, "grad_norm": 5.856504917144775, "learning_rate": 1.1434853235616337e-05, "loss": 0.0154, "step": 113408 }, { "epoch": 4.293906539231612, "grad_norm": 1.3605588674545288, "learning_rate": 1.1415586868648711e-05, "loss": 0.0154, "step": 113664 }, { "epoch": 4.303577499905557, "grad_norm": 0.4841889441013336, "learning_rate": 1.1396244947300821e-05, "loss": 0.0143, "step": 113920 }, { "epoch": 4.313248460579502, "grad_norm": 2.290330171585083, "learning_rate": 1.137690302595293e-05, "loss": 0.0124, "step": 114176 }, { "epoch": 4.322919421253447, "grad_norm": 0.6299089193344116, "learning_rate": 1.135756110460504e-05, "loss": 0.015, "step": 114432 }, { "epoch": 4.3325903819273925, "grad_norm": 2.6966304779052734, "learning_rate": 1.1338219183257152e-05, "loss": 0.0147, "step": 114688 }, { "epoch": 4.342261342601337, "grad_norm": 3.5934536457061768, "learning_rate": 1.131887726190926e-05, "loss": 0.0135, "step": 114944 }, { "epoch": 4.351932303275283, "grad_norm": 2.391207218170166, "learning_rate": 1.129953534056137e-05, "loss": 0.0127, "step": 115200 }, { "epoch": 4.3616032639492275, "grad_norm": 7.104966640472412, "learning_rate": 1.1280193419213479e-05, "loss": 0.0127, "step": 115456 }, { "epoch": 4.371274224623172, "grad_norm": 4.91796875, "learning_rate": 1.126085149786559e-05, "loss": 0.0136, "step": 115712 }, { "epoch": 4.380945185297118, "grad_norm": 0.9888034462928772, "learning_rate": 1.1241509576517699e-05, "loss": 0.0143, "step": 115968 }, { "epoch": 4.390616145971062, "grad_norm": 5.1639018058776855, "learning_rate": 1.1222167655169809e-05, "loss": 0.0127, "step": 116224 }, { "epoch": 4.400287106645008, "grad_norm": 8.415788650512695, "learning_rate": 1.1202825733821917e-05, "loss": 0.0145, "step": 116480 }, { "epoch": 4.409958067318953, "grad_norm": 4.235672950744629, "learning_rate": 1.118348381247403e-05, "loss": 0.0154, "step": 116736 }, { "epoch": 4.419629027992898, "grad_norm": 1.2345690727233887, "learning_rate": 1.116414189112614e-05, "loss": 0.0143, "step": 116992 }, { "epoch": 4.429299988666843, "grad_norm": 8.792167663574219, "learning_rate": 1.1144799969778248e-05, "loss": 0.0162, "step": 117248 }, { "epoch": 4.438970949340788, "grad_norm": 2.2384276390075684, "learning_rate": 1.1125533602810623e-05, "loss": 0.0141, "step": 117504 }, { "epoch": 4.448641910014733, "grad_norm": 1.041791558265686, "learning_rate": 1.1106191681462735e-05, "loss": 0.0136, "step": 117760 }, { "epoch": 4.458312870688678, "grad_norm": 4.7409515380859375, "learning_rate": 1.1086849760114844e-05, "loss": 0.0136, "step": 118016 }, { "epoch": 4.4679838313626234, "grad_norm": 0.2378958761692047, "learning_rate": 1.1067507838766954e-05, "loss": 0.0147, "step": 118272 }, { "epoch": 4.477654792036568, "grad_norm": 0.31944742798805237, "learning_rate": 1.1048165917419062e-05, "loss": 0.0135, "step": 118528 }, { "epoch": 4.487325752710514, "grad_norm": 1.0051418542861938, "learning_rate": 1.1028899550451438e-05, "loss": 0.0137, "step": 118784 }, { "epoch": 4.496996713384458, "grad_norm": 1.6819483041763306, "learning_rate": 1.1009557629103548e-05, "loss": 0.0156, "step": 119040 }, { "epoch": 4.506667674058404, "grad_norm": 0.40426477789878845, "learning_rate": 1.0990215707755658e-05, "loss": 0.0146, "step": 119296 }, { "epoch": 4.516338634732349, "grad_norm": 3.140392303466797, "learning_rate": 1.0970873786407768e-05, "loss": 0.0129, "step": 119552 }, { "epoch": 4.526009595406293, "grad_norm": 4.579553604125977, "learning_rate": 1.0951531865059877e-05, "loss": 0.0124, "step": 119808 }, { "epoch": 4.535680556080239, "grad_norm": 5.144607067108154, "learning_rate": 1.0932189943711989e-05, "loss": 0.0153, "step": 120064 }, { "epoch": 4.545351516754184, "grad_norm": 0.48044607043266296, "learning_rate": 1.0912848022364097e-05, "loss": 0.013, "step": 120320 }, { "epoch": 4.555022477428129, "grad_norm": 42.02862548828125, "learning_rate": 1.0893506101016207e-05, "loss": 0.0137, "step": 120576 }, { "epoch": 4.564693438102074, "grad_norm": 6.357956886291504, "learning_rate": 1.0874239734048581e-05, "loss": 0.0154, "step": 120832 }, { "epoch": 4.574364398776019, "grad_norm": 1.7824290990829468, "learning_rate": 1.0854897812700693e-05, "loss": 0.0127, "step": 121088 }, { "epoch": 4.584035359449964, "grad_norm": 1.5723809003829956, "learning_rate": 1.0835555891352803e-05, "loss": 0.0129, "step": 121344 }, { "epoch": 4.593706320123909, "grad_norm": 0.8262581825256348, "learning_rate": 1.0816213970004911e-05, "loss": 0.0151, "step": 121600 }, { "epoch": 4.603377280797854, "grad_norm": 0.8032371401786804, "learning_rate": 1.0796872048657022e-05, "loss": 0.0158, "step": 121856 }, { "epoch": 4.613048241471799, "grad_norm": 0.9602940082550049, "learning_rate": 1.0777530127309133e-05, "loss": 0.0142, "step": 122112 }, { "epoch": 4.622719202145745, "grad_norm": 1.9872223138809204, "learning_rate": 1.0758188205961242e-05, "loss": 0.0134, "step": 122368 }, { "epoch": 4.632390162819689, "grad_norm": 5.095240116119385, "learning_rate": 1.0738846284613352e-05, "loss": 0.014, "step": 122624 }, { "epoch": 4.642061123493635, "grad_norm": 5.41306209564209, "learning_rate": 1.071950436326546e-05, "loss": 0.0124, "step": 122880 }, { "epoch": 4.6517320841675796, "grad_norm": 10.439872741699219, "learning_rate": 1.0700162441917572e-05, "loss": 0.0148, "step": 123136 }, { "epoch": 4.661403044841524, "grad_norm": 0.26815417408943176, "learning_rate": 1.068082052056968e-05, "loss": 0.0132, "step": 123392 }, { "epoch": 4.67107400551547, "grad_norm": 0.6603275537490845, "learning_rate": 1.0661554153602056e-05, "loss": 0.0116, "step": 123648 }, { "epoch": 4.6807449661894145, "grad_norm": 7.041431903839111, "learning_rate": 1.0642212232254165e-05, "loss": 0.0129, "step": 123904 }, { "epoch": 4.69041592686336, "grad_norm": 0.7987198233604431, "learning_rate": 1.0622870310906275e-05, "loss": 0.0129, "step": 124160 }, { "epoch": 4.700086887537305, "grad_norm": 0.14286652207374573, "learning_rate": 1.0603528389558387e-05, "loss": 0.0124, "step": 124416 }, { "epoch": 4.70975784821125, "grad_norm": 0.9553490877151489, "learning_rate": 1.0584186468210495e-05, "loss": 0.0124, "step": 124672 }, { "epoch": 4.719428808885195, "grad_norm": 0.6493328809738159, "learning_rate": 1.0564844546862605e-05, "loss": 0.012, "step": 124928 }, { "epoch": 4.729099769559141, "grad_norm": 0.2133503556251526, "learning_rate": 1.0545502625514714e-05, "loss": 0.014, "step": 125184 }, { "epoch": 4.738770730233085, "grad_norm": 2.6697592735290527, "learning_rate": 1.0526160704166826e-05, "loss": 0.013, "step": 125440 }, { "epoch": 4.748441690907031, "grad_norm": 0.8658607602119446, "learning_rate": 1.0506894337199201e-05, "loss": 0.0131, "step": 125696 }, { "epoch": 4.7581126515809755, "grad_norm": 1.9833451509475708, "learning_rate": 1.048755241585131e-05, "loss": 0.013, "step": 125952 }, { "epoch": 4.76778361225492, "grad_norm": 0.19457457959651947, "learning_rate": 1.046821049450342e-05, "loss": 0.0142, "step": 126208 }, { "epoch": 4.777454572928866, "grad_norm": 0.9841840863227844, "learning_rate": 1.044886857315553e-05, "loss": 0.0123, "step": 126464 }, { "epoch": 4.7871255336028105, "grad_norm": 1.9175962209701538, "learning_rate": 1.042952665180764e-05, "loss": 0.0136, "step": 126720 }, { "epoch": 4.796796494276756, "grad_norm": 1.7247178554534912, "learning_rate": 1.0410184730459748e-05, "loss": 0.0136, "step": 126976 }, { "epoch": 4.806467454950701, "grad_norm": 2.0652236938476562, "learning_rate": 1.0390842809111859e-05, "loss": 0.0112, "step": 127232 }, { "epoch": 4.816138415624646, "grad_norm": 1.1892759799957275, "learning_rate": 1.037150088776397e-05, "loss": 0.0125, "step": 127488 }, { "epoch": 4.825809376298591, "grad_norm": 29.004505157470703, "learning_rate": 1.0352158966416079e-05, "loss": 0.0112, "step": 127744 }, { "epoch": 4.835480336972536, "grad_norm": 0.5393902659416199, "learning_rate": 1.0332817045068189e-05, "loss": 0.0124, "step": 128000 }, { "epoch": 4.845151297646481, "grad_norm": 0.9426546096801758, "learning_rate": 1.0313475123720297e-05, "loss": 0.011, "step": 128256 }, { "epoch": 4.854822258320426, "grad_norm": 1.1209189891815186, "learning_rate": 1.029413320237241e-05, "loss": 0.0117, "step": 128512 }, { "epoch": 4.8644932189943715, "grad_norm": 0.8620722889900208, "learning_rate": 1.027479128102452e-05, "loss": 0.0121, "step": 128768 }, { "epoch": 4.874164179668316, "grad_norm": 0.14039096236228943, "learning_rate": 1.0255524914056893e-05, "loss": 0.0145, "step": 129024 }, { "epoch": 4.883835140342262, "grad_norm": 0.5701342821121216, "learning_rate": 1.0236182992709003e-05, "loss": 0.0114, "step": 129280 }, { "epoch": 4.893506101016206, "grad_norm": 0.802097499370575, "learning_rate": 1.0216841071361112e-05, "loss": 0.0124, "step": 129536 }, { "epoch": 4.903177061690151, "grad_norm": 0.49824589490890503, "learning_rate": 1.0197499150013224e-05, "loss": 0.0133, "step": 129792 }, { "epoch": 4.912848022364097, "grad_norm": 1.2195169925689697, "learning_rate": 1.0178157228665332e-05, "loss": 0.0134, "step": 130048 }, { "epoch": 4.922518983038041, "grad_norm": 23.420486450195312, "learning_rate": 1.0158815307317442e-05, "loss": 0.0125, "step": 130304 }, { "epoch": 4.932189943711987, "grad_norm": 0.8538005948066711, "learning_rate": 1.013947338596955e-05, "loss": 0.0124, "step": 130560 }, { "epoch": 4.941860904385932, "grad_norm": 3.506213903427124, "learning_rate": 1.0120131464621663e-05, "loss": 0.0125, "step": 130816 }, { "epoch": 4.951531865059877, "grad_norm": 0.5479403734207153, "learning_rate": 1.0100789543273773e-05, "loss": 0.0137, "step": 131072 }, { "epoch": 4.961202825733822, "grad_norm": 0.24773092567920685, "learning_rate": 1.0081447621925881e-05, "loss": 0.011, "step": 131328 }, { "epoch": 4.970873786407767, "grad_norm": 0.3392595946788788, "learning_rate": 1.0062105700577991e-05, "loss": 0.0113, "step": 131584 }, { "epoch": 4.980544747081712, "grad_norm": 9.13214111328125, "learning_rate": 1.0042763779230103e-05, "loss": 0.0103, "step": 131840 }, { "epoch": 4.990215707755657, "grad_norm": 0.6798635721206665, "learning_rate": 1.0023421857882212e-05, "loss": 0.0112, "step": 132096 }, { "epoch": 4.999886668429602, "grad_norm": 3.8642024993896484, "learning_rate": 1.0004155490914587e-05, "loss": 0.0132, "step": 132352 }, { "epoch": 5.0, "eval_f1_B-DISEASE": 0.8076930929582066, "eval_f1_B-MEDICATION": 0.9320128479657388, "eval_f1_B-PROCEDURE": 0.8299265901797881, "eval_f1_B-SYMPTOM": 0.802206734344465, "eval_f1_I-DISEASE": 0.8443607103813086, "eval_f1_I-MEDICATION": 0.9473012496676416, "eval_f1_I-PROCEDURE": 0.8478063590704366, "eval_f1_I-SYMPTOM": 0.8098297732572185, "eval_f1_O": 0.9250181548686375, "eval_f1_macro": 0.8606839458548268, "eval_f1_micro": 0.8883442580707792, "eval_loss": 0.4413922429084778, "eval_precision_B-DISEASE": 0.83231643172733, "eval_precision_B-MEDICATION": 0.9345142243692969, "eval_precision_B-PROCEDURE": 0.8534490552391285, "eval_precision_B-SYMPTOM": 0.8108590008041114, "eval_precision_I-DISEASE": 0.8482866891605315, "eval_precision_I-MEDICATION": 0.952467518579907, "eval_precision_I-PROCEDURE": 0.8695185973099094, "eval_precision_I-SYMPTOM": 0.8183803238035672, "eval_precision_O": 0.9206075463451251, "eval_precision_macro": 0.8711554874821008, "eval_precision_micro": 0.8905099095411801, "eval_rauc_macro": 0.9146732071749695, "eval_rauc_micro": 0.9361508470284178, "eval_recall_B-DISEASE": 0.784484810026176, "eval_recall_B-MEDICATION": 0.9295248264815803, "eval_recall_B-PROCEDURE": 0.8076659822039699, "eval_recall_B-SYMPTOM": 0.7937371663244354, "eval_recall_I-DISEASE": 0.8404709040504773, "eval_recall_I-MEDICATION": 0.9421907230126408, "eval_recall_I-PROCEDURE": 0.8271520314654786, "eval_recall_I-SYMPTOM": 0.8014560498603052, "eval_recall_O": 0.9294712290821007, "eval_recall_macro": 0.8506837469452404, "eval_recall_micro": 0.8861891144410676, "eval_roc_auc_B-DISEASE": 0.8907720240571226, "eval_roc_auc_B-MEDICATION": 0.964629291931633, "eval_roc_auc_B-PROCEDURE": 0.9026383818040776, "eval_roc_auc_B-SYMPTOM": 0.8948664885493695, "eval_roc_auc_I-DISEASE": 0.9104202960745905, "eval_roc_auc_I-MEDICATION": 0.9707688588965032, "eval_roc_auc_I-PROCEDURE": 0.9075291450310622, "eval_roc_auc_I-SYMPTOM": 0.8868675259287415, "eval_roc_auc_O": 0.9035668523016245, "eval_runtime": 61.1266, "eval_samples_per_second": 177.877, "eval_steps_per_second": 22.249, "step": 132355 }, { "epoch": 5.009557629103547, "grad_norm": 0.4311939775943756, "learning_rate": 9.984813569566696e-06, "loss": 0.0071, "step": 132608 }, { "epoch": 5.019228589777493, "grad_norm": 1.0362738370895386, "learning_rate": 9.965471648218806e-06, "loss": 0.0086, "step": 132864 }, { "epoch": 5.028899550451437, "grad_norm": 1.9174968004226685, "learning_rate": 9.946129726870916e-06, "loss": 0.0069, "step": 133120 }, { "epoch": 5.038570511125383, "grad_norm": 0.2945270240306854, "learning_rate": 9.926787805523026e-06, "loss": 0.0086, "step": 133376 }, { "epoch": 5.048241471799328, "grad_norm": 0.024452047422528267, "learning_rate": 9.907445884175136e-06, "loss": 0.0068, "step": 133632 }, { "epoch": 5.057912432473272, "grad_norm": 0.9929279088973999, "learning_rate": 9.888179517207512e-06, "loss": 0.0077, "step": 133888 }, { "epoch": 5.067583393147218, "grad_norm": 3.7908236980438232, "learning_rate": 9.86883759585962e-06, "loss": 0.0088, "step": 134144 }, { "epoch": 5.0772543538211625, "grad_norm": 1.9607737064361572, "learning_rate": 9.84949567451173e-06, "loss": 0.0077, "step": 134400 }, { "epoch": 5.086925314495108, "grad_norm": 4.287957668304443, "learning_rate": 9.83015375316384e-06, "loss": 0.0091, "step": 134656 }, { "epoch": 5.096596275169053, "grad_norm": 0.9552005529403687, "learning_rate": 9.81081183181595e-06, "loss": 0.0081, "step": 134912 }, { "epoch": 5.106267235842998, "grad_norm": 3.186450481414795, "learning_rate": 9.79146991046806e-06, "loss": 0.0074, "step": 135168 }, { "epoch": 5.115938196516943, "grad_norm": 0.3131207525730133, "learning_rate": 9.772127989120171e-06, "loss": 0.0076, "step": 135424 }, { "epoch": 5.125609157190888, "grad_norm": 0.2811489701271057, "learning_rate": 9.75278606777228e-06, "loss": 0.009, "step": 135680 }, { "epoch": 5.135280117864833, "grad_norm": 0.3740166127681732, "learning_rate": 9.73344414642439e-06, "loss": 0.0078, "step": 135936 }, { "epoch": 5.144951078538778, "grad_norm": 1.3230619430541992, "learning_rate": 9.7141022250765e-06, "loss": 0.008, "step": 136192 }, { "epoch": 5.1546220392127235, "grad_norm": 0.6120270490646362, "learning_rate": 9.694835858108874e-06, "loss": 0.0073, "step": 136448 }, { "epoch": 5.164292999886668, "grad_norm": 4.456196308135986, "learning_rate": 9.675493936760985e-06, "loss": 0.01, "step": 136704 }, { "epoch": 5.173963960560614, "grad_norm": 0.2429385632276535, "learning_rate": 9.656152015413094e-06, "loss": 0.0088, "step": 136960 }, { "epoch": 5.1836349212345585, "grad_norm": 1.2598743438720703, "learning_rate": 9.636810094065204e-06, "loss": 0.0103, "step": 137216 }, { "epoch": 5.193305881908504, "grad_norm": 1.915220022201538, "learning_rate": 9.617468172717314e-06, "loss": 0.0098, "step": 137472 }, { "epoch": 5.202976842582449, "grad_norm": 0.8882638812065125, "learning_rate": 9.598126251369424e-06, "loss": 0.0094, "step": 137728 }, { "epoch": 5.212647803256393, "grad_norm": 0.10290802270174026, "learning_rate": 9.578784330021533e-06, "loss": 0.0085, "step": 137984 }, { "epoch": 5.222318763930339, "grad_norm": 3.6905243396759033, "learning_rate": 9.559442408673644e-06, "loss": 0.0105, "step": 138240 }, { "epoch": 5.231989724604284, "grad_norm": 0.27331113815307617, "learning_rate": 9.540100487325753e-06, "loss": 0.0096, "step": 138496 }, { "epoch": 5.241660685278229, "grad_norm": 0.3453868627548218, "learning_rate": 9.520758565977863e-06, "loss": 0.0093, "step": 138752 }, { "epoch": 5.251331645952174, "grad_norm": 6.686705589294434, "learning_rate": 9.501416644629973e-06, "loss": 0.0103, "step": 139008 }, { "epoch": 5.2610026066261195, "grad_norm": 6.24531364440918, "learning_rate": 9.482074723282083e-06, "loss": 0.0089, "step": 139264 }, { "epoch": 5.270673567300064, "grad_norm": 5.73491907119751, "learning_rate": 9.462732801934192e-06, "loss": 0.0099, "step": 139520 }, { "epoch": 5.280344527974009, "grad_norm": 3.492011308670044, "learning_rate": 9.443390880586304e-06, "loss": 0.0078, "step": 139776 }, { "epoch": 5.2900154886479545, "grad_norm": 0.5170190930366516, "learning_rate": 9.424048959238412e-06, "loss": 0.0078, "step": 140032 }, { "epoch": 5.299686449321899, "grad_norm": 0.07176166027784348, "learning_rate": 9.404707037890522e-06, "loss": 0.008, "step": 140288 }, { "epoch": 5.309357409995845, "grad_norm": 1.4914041757583618, "learning_rate": 9.385440670922898e-06, "loss": 0.009, "step": 140544 }, { "epoch": 5.319028370669789, "grad_norm": 2.964844226837158, "learning_rate": 9.366098749575008e-06, "loss": 0.0085, "step": 140800 }, { "epoch": 5.328699331343735, "grad_norm": 0.2556954324245453, "learning_rate": 9.346756828227116e-06, "loss": 0.0091, "step": 141056 }, { "epoch": 5.33837029201768, "grad_norm": 3.6987621784210205, "learning_rate": 9.327414906879228e-06, "loss": 0.0079, "step": 141312 }, { "epoch": 5.348041252691624, "grad_norm": 4.68747615814209, "learning_rate": 9.308072985531337e-06, "loss": 0.0087, "step": 141568 }, { "epoch": 5.35771221336557, "grad_norm": 0.4118718206882477, "learning_rate": 9.288731064183447e-06, "loss": 0.0096, "step": 141824 }, { "epoch": 5.367383174039515, "grad_norm": 0.3415529429912567, "learning_rate": 9.269389142835557e-06, "loss": 0.0088, "step": 142080 }, { "epoch": 5.37705413471346, "grad_norm": 0.10233204811811447, "learning_rate": 9.250047221487667e-06, "loss": 0.0082, "step": 142336 }, { "epoch": 5.386725095387405, "grad_norm": 0.2785378396511078, "learning_rate": 9.230705300139775e-06, "loss": 0.0107, "step": 142592 }, { "epoch": 5.39639605606135, "grad_norm": 1.3955039978027344, "learning_rate": 9.211438933172151e-06, "loss": 0.0081, "step": 142848 }, { "epoch": 5.406067016735295, "grad_norm": 6.009440898895264, "learning_rate": 9.192097011824261e-06, "loss": 0.0078, "step": 143104 }, { "epoch": 5.415737977409241, "grad_norm": 0.10272625833749771, "learning_rate": 9.172755090476371e-06, "loss": 0.0097, "step": 143360 }, { "epoch": 5.425408938083185, "grad_norm": 5.104133605957031, "learning_rate": 9.153413169128481e-06, "loss": 0.0079, "step": 143616 }, { "epoch": 5.43507989875713, "grad_norm": 0.218174010515213, "learning_rate": 9.13407124778059e-06, "loss": 0.0088, "step": 143872 }, { "epoch": 5.444750859431076, "grad_norm": 4.509812355041504, "learning_rate": 9.1147293264327e-06, "loss": 0.0088, "step": 144128 }, { "epoch": 5.45442182010502, "grad_norm": 0.9697214961051941, "learning_rate": 9.09538740508481e-06, "loss": 0.0088, "step": 144384 }, { "epoch": 5.464092780778966, "grad_norm": 1.000596046447754, "learning_rate": 9.07604548373692e-06, "loss": 0.0089, "step": 144640 }, { "epoch": 5.473763741452911, "grad_norm": 0.22552327811717987, "learning_rate": 9.05670356238903e-06, "loss": 0.009, "step": 144896 }, { "epoch": 5.483434702126856, "grad_norm": 0.7269870042800903, "learning_rate": 9.03736164104114e-06, "loss": 0.0097, "step": 145152 }, { "epoch": 5.493105662800801, "grad_norm": 0.3465439975261688, "learning_rate": 9.018095274073515e-06, "loss": 0.0102, "step": 145408 }, { "epoch": 5.502776623474746, "grad_norm": 0.6706210970878601, "learning_rate": 8.998753352725625e-06, "loss": 0.009, "step": 145664 }, { "epoch": 5.512447584148691, "grad_norm": 36.635414123535156, "learning_rate": 8.979411431377735e-06, "loss": 0.0084, "step": 145920 }, { "epoch": 5.522118544822636, "grad_norm": 1.6504905223846436, "learning_rate": 8.960069510029845e-06, "loss": 0.008, "step": 146176 }, { "epoch": 5.531789505496581, "grad_norm": 0.5836831331253052, "learning_rate": 8.940727588681955e-06, "loss": 0.0087, "step": 146432 }, { "epoch": 5.541460466170526, "grad_norm": 0.684615433216095, "learning_rate": 8.921385667334065e-06, "loss": 0.0076, "step": 146688 }, { "epoch": 5.551131426844472, "grad_norm": 2.892833709716797, "learning_rate": 8.902043745986174e-06, "loss": 0.0085, "step": 146944 }, { "epoch": 5.560802387518416, "grad_norm": 20.16980743408203, "learning_rate": 8.882701824638284e-06, "loss": 0.0084, "step": 147200 }, { "epoch": 5.570473348192362, "grad_norm": 3.590690851211548, "learning_rate": 8.86343545767066e-06, "loss": 0.008, "step": 147456 }, { "epoch": 5.5801443088663065, "grad_norm": 0.8185898661613464, "learning_rate": 8.844093536322768e-06, "loss": 0.0074, "step": 147712 }, { "epoch": 5.589815269540251, "grad_norm": 0.3533550202846527, "learning_rate": 8.82475161497488e-06, "loss": 0.0079, "step": 147968 }, { "epoch": 5.599486230214197, "grad_norm": 1.9578065872192383, "learning_rate": 8.805409693626988e-06, "loss": 0.0077, "step": 148224 }, { "epoch": 5.6091571908881415, "grad_norm": 0.17547504603862762, "learning_rate": 8.786067772279098e-06, "loss": 0.0086, "step": 148480 }, { "epoch": 5.618828151562087, "grad_norm": 1.6071051359176636, "learning_rate": 8.766725850931208e-06, "loss": 0.0085, "step": 148736 }, { "epoch": 5.628499112236032, "grad_norm": 0.8156256079673767, "learning_rate": 8.747383929583318e-06, "loss": 0.007, "step": 148992 }, { "epoch": 5.638170072909977, "grad_norm": 11.689854621887207, "learning_rate": 8.728042008235429e-06, "loss": 0.0096, "step": 149248 }, { "epoch": 5.647841033583922, "grad_norm": 0.9380402565002441, "learning_rate": 8.708700086887539e-06, "loss": 0.0082, "step": 149504 }, { "epoch": 5.657511994257867, "grad_norm": 0.5615717172622681, "learning_rate": 8.689433719919913e-06, "loss": 0.0079, "step": 149760 }, { "epoch": 5.667182954931812, "grad_norm": 1.7645057439804077, "learning_rate": 8.670091798572023e-06, "loss": 0.0103, "step": 150016 }, { "epoch": 5.676853915605757, "grad_norm": 4.009266376495361, "learning_rate": 8.650749877224133e-06, "loss": 0.0089, "step": 150272 }, { "epoch": 5.6865248762797025, "grad_norm": 0.6033828854560852, "learning_rate": 8.631407955876243e-06, "loss": 0.0074, "step": 150528 }, { "epoch": 5.696195836953647, "grad_norm": 5.695953369140625, "learning_rate": 8.612066034528353e-06, "loss": 0.0087, "step": 150784 }, { "epoch": 5.705866797627593, "grad_norm": 2.7361793518066406, "learning_rate": 8.592724113180463e-06, "loss": 0.0083, "step": 151040 }, { "epoch": 5.715537758301537, "grad_norm": 0.26094111800193787, "learning_rate": 8.573382191832572e-06, "loss": 0.0083, "step": 151296 }, { "epoch": 5.725208718975482, "grad_norm": 0.7956266403198242, "learning_rate": 8.554040270484682e-06, "loss": 0.0098, "step": 151552 }, { "epoch": 5.734879679649428, "grad_norm": 0.28114378452301025, "learning_rate": 8.534698349136792e-06, "loss": 0.0093, "step": 151808 }, { "epoch": 5.744550640323372, "grad_norm": 2.0160458087921143, "learning_rate": 8.515356427788902e-06, "loss": 0.0099, "step": 152064 }, { "epoch": 5.754221600997318, "grad_norm": 1.1137139797210693, "learning_rate": 8.496014506441012e-06, "loss": 0.0082, "step": 152320 }, { "epoch": 5.763892561671263, "grad_norm": 3.6147210597991943, "learning_rate": 8.476672585093122e-06, "loss": 0.0083, "step": 152576 }, { "epoch": 5.773563522345208, "grad_norm": 4.814803123474121, "learning_rate": 8.457330663745231e-06, "loss": 0.007, "step": 152832 }, { "epoch": 5.783234483019153, "grad_norm": 0.7147836089134216, "learning_rate": 8.437988742397341e-06, "loss": 0.0088, "step": 153088 }, { "epoch": 5.7929054436930985, "grad_norm": 1.4864364862442017, "learning_rate": 8.418646821049451e-06, "loss": 0.0063, "step": 153344 }, { "epoch": 5.802576404367043, "grad_norm": 0.7108877301216125, "learning_rate": 8.399304899701561e-06, "loss": 0.0076, "step": 153600 }, { "epoch": 5.812247365040989, "grad_norm": 2.6084437370300293, "learning_rate": 8.379962978353671e-06, "loss": 0.0086, "step": 153856 }, { "epoch": 5.821918325714933, "grad_norm": 0.09771878272294998, "learning_rate": 8.360696611386045e-06, "loss": 0.009, "step": 154112 }, { "epoch": 5.831589286388878, "grad_norm": 0.6635453104972839, "learning_rate": 8.341354690038155e-06, "loss": 0.0071, "step": 154368 }, { "epoch": 5.841260247062824, "grad_norm": 1.141113042831421, "learning_rate": 8.322088323070531e-06, "loss": 0.0076, "step": 154624 }, { "epoch": 5.850931207736768, "grad_norm": 0.8303898572921753, "learning_rate": 8.30274640172264e-06, "loss": 0.0096, "step": 154880 }, { "epoch": 5.860602168410714, "grad_norm": 0.5006212592124939, "learning_rate": 8.28340448037475e-06, "loss": 0.0076, "step": 155136 }, { "epoch": 5.870273129084659, "grad_norm": 1.09455406665802, "learning_rate": 8.26406255902686e-06, "loss": 0.007, "step": 155392 }, { "epoch": 5.879944089758604, "grad_norm": 0.5454290509223938, "learning_rate": 8.24472063767897e-06, "loss": 0.0081, "step": 155648 }, { "epoch": 5.889615050432549, "grad_norm": 14.345696449279785, "learning_rate": 8.22537871633108e-06, "loss": 0.0097, "step": 155904 }, { "epoch": 5.8992860111064935, "grad_norm": 2.6186184883117676, "learning_rate": 8.20603679498319e-06, "loss": 0.0089, "step": 156160 }, { "epoch": 5.908956971780439, "grad_norm": 1.894392967224121, "learning_rate": 8.186694873635299e-06, "loss": 0.0078, "step": 156416 }, { "epoch": 5.918627932454384, "grad_norm": 0.8504657745361328, "learning_rate": 8.167352952287409e-06, "loss": 0.0089, "step": 156672 }, { "epoch": 5.928298893128329, "grad_norm": 1.3975979089736938, "learning_rate": 8.148011030939519e-06, "loss": 0.0076, "step": 156928 }, { "epoch": 5.937969853802274, "grad_norm": 0.12042956799268723, "learning_rate": 8.128669109591629e-06, "loss": 0.0073, "step": 157184 }, { "epoch": 5.94764081447622, "grad_norm": 0.7101040482521057, "learning_rate": 8.10932718824374e-06, "loss": 0.0084, "step": 157440 }, { "epoch": 5.957311775150164, "grad_norm": 0.049012139439582825, "learning_rate": 8.08998526689585e-06, "loss": 0.0078, "step": 157696 }, { "epoch": 5.966982735824109, "grad_norm": 3.6799347400665283, "learning_rate": 8.070643345547958e-06, "loss": 0.0069, "step": 157952 }, { "epoch": 5.976653696498055, "grad_norm": 5.190356731414795, "learning_rate": 8.051301424200068e-06, "loss": 0.0083, "step": 158208 }, { "epoch": 5.986324657171999, "grad_norm": 0.405319482088089, "learning_rate": 8.031959502852178e-06, "loss": 0.0076, "step": 158464 }, { "epoch": 5.995995617845945, "grad_norm": 7.220467567443848, "learning_rate": 8.012617581504288e-06, "loss": 0.0076, "step": 158720 }, { "epoch": 6.0, "eval_f1_B-DISEASE": 0.8137172062733137, "eval_f1_B-MEDICATION": 0.9366835994194485, "eval_f1_B-PROCEDURE": 0.8338765045843654, "eval_f1_B-SYMPTOM": 0.802911592516026, "eval_f1_I-DISEASE": 0.8454658855143709, "eval_f1_I-MEDICATION": 0.9528172976448839, "eval_f1_I-PROCEDURE": 0.8516517120332436, "eval_f1_I-SYMPTOM": 0.8137056001030496, "eval_f1_O": 0.9249487903086626, "eval_f1_macro": 0.8639753542663738, "eval_f1_micro": 0.8895774266140277, "eval_loss": 0.4695410430431366, "eval_precision_B-DISEASE": 0.846968981938307, "eval_precision_B-MEDICATION": 0.9552266419981499, "eval_precision_B-PROCEDURE": 0.8472406181015453, "eval_precision_B-SYMPTOM": 0.8153335920685884, "eval_precision_I-DISEASE": 0.8649620429239743, "eval_precision_I-MEDICATION": 0.9655191138140747, "eval_precision_I-PROCEDURE": 0.8750075317409081, "eval_precision_I-SYMPTOM": 0.8128417937335135, "eval_precision_O": 0.9181102771519474, "eval_precision_macro": 0.8779122881634454, "eval_precision_micro": 0.8910716505650682, "eval_rauc_macro": 0.9145927366341076, "eval_rauc_micro": 0.9371255777552341, "eval_recall_B-DISEASE": 0.7829777107955898, "eval_recall_B-MEDICATION": 0.9188467698878804, "eval_recall_B-PROCEDURE": 0.820927446954141, "eval_recall_B-SYMPTOM": 0.7908624229979466, "eval_recall_I-DISEASE": 0.8268292376974121, "eval_recall_I-MEDICATION": 0.9404453377056117, "eval_recall_I-PROCEDURE": 0.8295103103299143, "eval_recall_I-SYMPTOM": 0.8145712443584784, "eval_recall_O": 0.9318899408213067, "eval_recall_macro": 0.8507622690609201, "eval_recall_micro": 0.8880882055547169, "eval_roc_auc_B-DISEASE": 0.8901727076261692, "eval_roc_auc_B-MEDICATION": 0.9593353648433499, "eval_roc_auc_B-PROCEDURE": 0.9091887857316173, "eval_roc_auc_B-SYMPTOM": 0.8934942497238074, "eval_roc_auc_I-DISEASE": 0.9049857727909453, "eval_roc_auc_I-MEDICATION": 0.9699894527029364, "eval_roc_auc_I-PROCEDURE": 0.9089825820626593, "eval_roc_auc_I-SYMPTOM": 0.8926697963231113, "eval_roc_auc_O": 0.9025159179023717, "eval_runtime": 60.8686, "eval_samples_per_second": 178.631, "eval_steps_per_second": 22.343, "step": 158826 }, { "epoch": 6.0056665785198895, "grad_norm": 2.5522048473358154, "learning_rate": 7.993275660156398e-06, "loss": 0.0067, "step": 158976 }, { "epoch": 6.015337539193835, "grad_norm": 0.1445242464542389, "learning_rate": 7.973933738808508e-06, "loss": 0.0056, "step": 159232 }, { "epoch": 6.02500849986778, "grad_norm": 0.8560025095939636, "learning_rate": 7.954591817460619e-06, "loss": 0.004, "step": 159488 }, { "epoch": 6.034679460541725, "grad_norm": 0.5683347582817078, "learning_rate": 7.935249896112727e-06, "loss": 0.0048, "step": 159744 }, { "epoch": 6.04435042121567, "grad_norm": 0.10347987711429596, "learning_rate": 7.916059083525368e-06, "loss": 0.0048, "step": 160000 }, { "epoch": 6.054021381889615, "grad_norm": 0.765963613986969, "learning_rate": 7.896717162177477e-06, "loss": 0.0044, "step": 160256 }, { "epoch": 6.06369234256356, "grad_norm": 0.23950816690921783, "learning_rate": 7.877375240829588e-06, "loss": 0.0039, "step": 160512 }, { "epoch": 6.073363303237505, "grad_norm": 1.28322434425354, "learning_rate": 7.858033319481697e-06, "loss": 0.0058, "step": 160768 }, { "epoch": 6.0830342639114505, "grad_norm": 0.0957738608121872, "learning_rate": 7.838691398133807e-06, "loss": 0.0051, "step": 161024 }, { "epoch": 6.092705224585395, "grad_norm": 0.09481767565011978, "learning_rate": 7.819349476785917e-06, "loss": 0.006, "step": 161280 }, { "epoch": 6.102376185259341, "grad_norm": 1.084963321685791, "learning_rate": 7.800007555438027e-06, "loss": 0.0052, "step": 161536 }, { "epoch": 6.1120471459332855, "grad_norm": 0.19962504506111145, "learning_rate": 7.780665634090136e-06, "loss": 0.0063, "step": 161792 }, { "epoch": 6.12171810660723, "grad_norm": 0.44316428899765015, "learning_rate": 7.761323712742248e-06, "loss": 0.0055, "step": 162048 }, { "epoch": 6.131389067281176, "grad_norm": 0.096375972032547, "learning_rate": 7.741981791394356e-06, "loss": 0.0046, "step": 162304 }, { "epoch": 6.14106002795512, "grad_norm": 0.06923657655715942, "learning_rate": 7.722639870046466e-06, "loss": 0.0065, "step": 162560 }, { "epoch": 6.150730988629066, "grad_norm": 0.05641581490635872, "learning_rate": 7.703373503078842e-06, "loss": 0.0053, "step": 162816 }, { "epoch": 6.160401949303011, "grad_norm": 0.10804527252912521, "learning_rate": 7.684031581730952e-06, "loss": 0.0049, "step": 163072 }, { "epoch": 6.170072909976956, "grad_norm": 0.3508886694908142, "learning_rate": 7.664689660383062e-06, "loss": 0.0057, "step": 163328 }, { "epoch": 6.179743870650901, "grad_norm": 0.19692179560661316, "learning_rate": 7.645347739035172e-06, "loss": 0.0051, "step": 163584 }, { "epoch": 6.1894148313248465, "grad_norm": 7.462360382080078, "learning_rate": 7.6260058176872806e-06, "loss": 0.0052, "step": 163840 }, { "epoch": 6.199085791998791, "grad_norm": 0.0778375118970871, "learning_rate": 7.6066638963393915e-06, "loss": 0.0057, "step": 164096 }, { "epoch": 6.208756752672736, "grad_norm": 3.7269153594970703, "learning_rate": 7.587321974991501e-06, "loss": 0.0066, "step": 164352 }, { "epoch": 6.218427713346681, "grad_norm": 0.3314465582370758, "learning_rate": 7.567980053643611e-06, "loss": 0.0066, "step": 164608 }, { "epoch": 6.228098674020626, "grad_norm": 0.4546041786670685, "learning_rate": 7.548713686675986e-06, "loss": 0.0054, "step": 164864 }, { "epoch": 6.237769634694572, "grad_norm": 0.1140669584274292, "learning_rate": 7.529371765328095e-06, "loss": 0.0053, "step": 165120 }, { "epoch": 6.247440595368516, "grad_norm": 0.03010609745979309, "learning_rate": 7.510029843980205e-06, "loss": 0.0045, "step": 165376 }, { "epoch": 6.257111556042462, "grad_norm": 0.015513704158365726, "learning_rate": 7.4906879226323145e-06, "loss": 0.0056, "step": 165632 }, { "epoch": 6.266782516716407, "grad_norm": 0.09134875982999802, "learning_rate": 7.4713460012844255e-06, "loss": 0.0066, "step": 165888 }, { "epoch": 6.276453477390351, "grad_norm": 0.016765909269452095, "learning_rate": 7.452004079936535e-06, "loss": 0.0053, "step": 166144 }, { "epoch": 6.286124438064297, "grad_norm": 0.5640433430671692, "learning_rate": 7.432662158588645e-06, "loss": 0.0049, "step": 166400 }, { "epoch": 6.295795398738242, "grad_norm": 0.21902374923229218, "learning_rate": 7.413320237240754e-06, "loss": 0.0064, "step": 166656 }, { "epoch": 6.305466359412187, "grad_norm": 5.544469833374023, "learning_rate": 7.393978315892864e-06, "loss": 0.0052, "step": 166912 }, { "epoch": 6.315137320086132, "grad_norm": 0.7227392792701721, "learning_rate": 7.3746363945449736e-06, "loss": 0.0054, "step": 167168 }, { "epoch": 6.324808280760077, "grad_norm": 5.256189346313477, "learning_rate": 7.3552944731970845e-06, "loss": 0.0057, "step": 167424 }, { "epoch": 6.334479241434022, "grad_norm": 4.659141540527344, "learning_rate": 7.335952551849194e-06, "loss": 0.0048, "step": 167680 }, { "epoch": 6.344150202107967, "grad_norm": 0.9072468876838684, "learning_rate": 7.316610630501304e-06, "loss": 0.0046, "step": 167936 }, { "epoch": 6.353821162781912, "grad_norm": 0.14295679330825806, "learning_rate": 7.297268709153413e-06, "loss": 0.0052, "step": 168192 }, { "epoch": 6.363492123455857, "grad_norm": 0.07267609983682632, "learning_rate": 7.277926787805523e-06, "loss": 0.0044, "step": 168448 }, { "epoch": 6.373163084129803, "grad_norm": 0.5884820818901062, "learning_rate": 7.258584866457633e-06, "loss": 0.0059, "step": 168704 }, { "epoch": 6.382834044803747, "grad_norm": 0.9420909881591797, "learning_rate": 7.239318499490009e-06, "loss": 0.0063, "step": 168960 }, { "epoch": 6.392505005477693, "grad_norm": 0.13994884490966797, "learning_rate": 7.2199765781421184e-06, "loss": 0.0047, "step": 169216 }, { "epoch": 6.4021759661516375, "grad_norm": 0.12359564751386642, "learning_rate": 7.2006346567942286e-06, "loss": 0.0052, "step": 169472 }, { "epoch": 6.411846926825583, "grad_norm": 2.2115604877471924, "learning_rate": 7.181292735446338e-06, "loss": 0.0057, "step": 169728 }, { "epoch": 6.421517887499528, "grad_norm": 0.019372638314962387, "learning_rate": 7.162101922858978e-06, "loss": 0.0058, "step": 169984 }, { "epoch": 6.4311888481734725, "grad_norm": 0.07033487409353256, "learning_rate": 7.142760001511088e-06, "loss": 0.007, "step": 170240 }, { "epoch": 6.440859808847418, "grad_norm": 0.0495685413479805, "learning_rate": 7.123418080163198e-06, "loss": 0.004, "step": 170496 }, { "epoch": 6.450530769521363, "grad_norm": 5.647730350494385, "learning_rate": 7.104076158815309e-06, "loss": 0.0056, "step": 170752 }, { "epoch": 6.460201730195308, "grad_norm": 0.04144500568509102, "learning_rate": 7.084734237467418e-06, "loss": 0.0045, "step": 171008 }, { "epoch": 6.469872690869253, "grad_norm": 0.04790091514587402, "learning_rate": 7.065392316119528e-06, "loss": 0.005, "step": 171264 }, { "epoch": 6.479543651543199, "grad_norm": 0.5807636976242065, "learning_rate": 7.046050394771637e-06, "loss": 0.006, "step": 171520 }, { "epoch": 6.489214612217143, "grad_norm": 0.04957037419080734, "learning_rate": 7.026708473423747e-06, "loss": 0.0055, "step": 171776 }, { "epoch": 6.498885572891089, "grad_norm": 0.29678839445114136, "learning_rate": 7.0073665520758576e-06, "loss": 0.0055, "step": 172032 }, { "epoch": 6.5085565335650335, "grad_norm": 0.12547393143177032, "learning_rate": 6.988024630727968e-06, "loss": 0.004, "step": 172288 }, { "epoch": 6.518227494238978, "grad_norm": 0.032738201320171356, "learning_rate": 6.9687582637603425e-06, "loss": 0.0062, "step": 172544 }, { "epoch": 6.527898454912924, "grad_norm": 0.013363759964704514, "learning_rate": 6.949416342412452e-06, "loss": 0.0053, "step": 172800 }, { "epoch": 6.537569415586868, "grad_norm": 0.12095487862825394, "learning_rate": 6.930074421064562e-06, "loss": 0.0059, "step": 173056 }, { "epoch": 6.547240376260814, "grad_norm": 1.4809820652008057, "learning_rate": 6.910732499716671e-06, "loss": 0.0045, "step": 173312 }, { "epoch": 6.556911336934759, "grad_norm": 0.11994415521621704, "learning_rate": 6.891390578368782e-06, "loss": 0.0045, "step": 173568 }, { "epoch": 6.566582297608704, "grad_norm": 0.5074435472488403, "learning_rate": 6.8720486570208915e-06, "loss": 0.0047, "step": 173824 }, { "epoch": 6.576253258282649, "grad_norm": 1.6332694292068481, "learning_rate": 6.852706735673002e-06, "loss": 0.0035, "step": 174080 }, { "epoch": 6.585924218956594, "grad_norm": 0.6540184020996094, "learning_rate": 6.833364814325111e-06, "loss": 0.005, "step": 174336 }, { "epoch": 6.595595179630539, "grad_norm": 4.0721588134765625, "learning_rate": 6.814022892977221e-06, "loss": 0.0043, "step": 174592 }, { "epoch": 6.605266140304484, "grad_norm": 0.34634193778038025, "learning_rate": 6.79468097162933e-06, "loss": 0.0063, "step": 174848 }, { "epoch": 6.6149371009784295, "grad_norm": 0.140211820602417, "learning_rate": 6.775339050281441e-06, "loss": 0.0036, "step": 175104 }, { "epoch": 6.624608061652374, "grad_norm": 1.9943935871124268, "learning_rate": 6.7559971289335505e-06, "loss": 0.0067, "step": 175360 }, { "epoch": 6.63427902232632, "grad_norm": 10.475574493408203, "learning_rate": 6.736655207585661e-06, "loss": 0.0054, "step": 175616 }, { "epoch": 6.643949983000264, "grad_norm": 12.580154418945312, "learning_rate": 6.71731328623777e-06, "loss": 0.0065, "step": 175872 }, { "epoch": 6.653620943674209, "grad_norm": 0.5796445608139038, "learning_rate": 6.69797136488988e-06, "loss": 0.0047, "step": 176128 }, { "epoch": 6.663291904348155, "grad_norm": 0.03411826118826866, "learning_rate": 6.678629443541989e-06, "loss": 0.0038, "step": 176384 }, { "epoch": 6.672962865022099, "grad_norm": 0.3479785621166229, "learning_rate": 6.6592875221941e-06, "loss": 0.0059, "step": 176640 }, { "epoch": 6.682633825696045, "grad_norm": 0.37940141558647156, "learning_rate": 6.63994560084621e-06, "loss": 0.0053, "step": 176896 }, { "epoch": 6.69230478636999, "grad_norm": 0.2881454527378082, "learning_rate": 6.62060367949832e-06, "loss": 0.0043, "step": 177152 }, { "epoch": 6.701975747043935, "grad_norm": 0.32211869955062866, "learning_rate": 6.601261758150429e-06, "loss": 0.0071, "step": 177408 }, { "epoch": 6.71164670771788, "grad_norm": 0.0830293819308281, "learning_rate": 6.581919836802539e-06, "loss": 0.0047, "step": 177664 }, { "epoch": 6.7213176683918245, "grad_norm": 0.6374879479408264, "learning_rate": 6.5625779154546484e-06, "loss": 0.005, "step": 177920 }, { "epoch": 6.73098862906577, "grad_norm": 0.42339634895324707, "learning_rate": 6.543235994106759e-06, "loss": 0.0057, "step": 178176 }, { "epoch": 6.740659589739715, "grad_norm": 0.21855546534061432, "learning_rate": 6.523894072758869e-06, "loss": 0.0044, "step": 178432 }, { "epoch": 6.75033055041366, "grad_norm": 0.17506131529808044, "learning_rate": 6.5046277057912435e-06, "loss": 0.0048, "step": 178688 }, { "epoch": 6.760001511087605, "grad_norm": 0.11796054244041443, "learning_rate": 6.485285784443354e-06, "loss": 0.0055, "step": 178944 }, { "epoch": 6.769672471761551, "grad_norm": 0.7898224592208862, "learning_rate": 6.465943863095463e-06, "loss": 0.0042, "step": 179200 }, { "epoch": 6.779343432435495, "grad_norm": 0.18970559537410736, "learning_rate": 6.446677496127839e-06, "loss": 0.0044, "step": 179456 }, { "epoch": 6.789014393109441, "grad_norm": 1.1956768035888672, "learning_rate": 6.427335574779948e-06, "loss": 0.0049, "step": 179712 }, { "epoch": 6.798685353783386, "grad_norm": 0.6470310688018799, "learning_rate": 6.407993653432059e-06, "loss": 0.005, "step": 179968 }, { "epoch": 6.808356314457331, "grad_norm": 4.411340236663818, "learning_rate": 6.388651732084168e-06, "loss": 0.0043, "step": 180224 }, { "epoch": 6.818027275131276, "grad_norm": 0.09347503632307053, "learning_rate": 6.369309810736278e-06, "loss": 0.0043, "step": 180480 }, { "epoch": 6.8276982358052205, "grad_norm": 0.007505136076360941, "learning_rate": 6.3499678893883875e-06, "loss": 0.0055, "step": 180736 }, { "epoch": 6.837369196479166, "grad_norm": 0.286432683467865, "learning_rate": 6.330625968040498e-06, "loss": 0.0039, "step": 180992 }, { "epoch": 6.847040157153111, "grad_norm": 0.2944409251213074, "learning_rate": 6.311284046692607e-06, "loss": 0.004, "step": 181248 }, { "epoch": 6.856711117827056, "grad_norm": 0.32838645577430725, "learning_rate": 6.291942125344718e-06, "loss": 0.0046, "step": 181504 }, { "epoch": 6.866382078501001, "grad_norm": 0.04531640559434891, "learning_rate": 6.272600203996827e-06, "loss": 0.0048, "step": 181760 }, { "epoch": 6.876053039174947, "grad_norm": 0.060415927320718765, "learning_rate": 6.253258282648937e-06, "loss": 0.0038, "step": 182016 }, { "epoch": 6.885723999848891, "grad_norm": 0.0581819973886013, "learning_rate": 6.233916361301047e-06, "loss": 0.004, "step": 182272 }, { "epoch": 6.895394960522836, "grad_norm": 0.0427870936691761, "learning_rate": 6.214574439953157e-06, "loss": 0.005, "step": 182528 }, { "epoch": 6.9050659211967815, "grad_norm": 1.7197208404541016, "learning_rate": 6.195232518605266e-06, "loss": 0.0043, "step": 182784 }, { "epoch": 6.914736881870726, "grad_norm": 0.09247897565364838, "learning_rate": 6.175890597257377e-06, "loss": 0.0056, "step": 183040 }, { "epoch": 6.924407842544672, "grad_norm": 0.019822193309664726, "learning_rate": 6.156548675909486e-06, "loss": 0.0055, "step": 183296 }, { "epoch": 6.9340788032186165, "grad_norm": 1.541544795036316, "learning_rate": 6.137282308941861e-06, "loss": 0.0057, "step": 183552 }, { "epoch": 6.943749763892562, "grad_norm": 1.1010373830795288, "learning_rate": 6.117940387593971e-06, "loss": 0.0048, "step": 183808 }, { "epoch": 6.953420724566507, "grad_norm": 0.32369253039360046, "learning_rate": 6.0985984662460805e-06, "loss": 0.0044, "step": 184064 }, { "epoch": 6.963091685240451, "grad_norm": 0.36643514037132263, "learning_rate": 6.079332099278456e-06, "loss": 0.0052, "step": 184320 }, { "epoch": 6.972762645914397, "grad_norm": 4.76845645904541, "learning_rate": 6.0599901779305655e-06, "loss": 0.0055, "step": 184576 }, { "epoch": 6.982433606588342, "grad_norm": 0.9248315095901489, "learning_rate": 6.0406482565826765e-06, "loss": 0.0057, "step": 184832 }, { "epoch": 6.992104567262287, "grad_norm": 7.52930212020874, "learning_rate": 6.021306335234786e-06, "loss": 0.0057, "step": 185088 }, { "epoch": 7.0, "eval_f1_B-DISEASE": 0.8170322475036831, "eval_f1_B-MEDICATION": 0.9388233174159835, "eval_f1_B-PROCEDURE": 0.839606661240293, "eval_f1_B-SYMPTOM": 0.8083601954871582, "eval_f1_I-DISEASE": 0.8508488239606706, "eval_f1_I-MEDICATION": 0.9480509127627348, "eval_f1_I-PROCEDURE": 0.8557830786803291, "eval_f1_I-SYMPTOM": 0.8172193160347818, "eval_f1_O": 0.9271939022229883, "eval_f1_macro": 0.8669909394787357, "eval_f1_micro": 0.8922796408524778, "eval_loss": 0.4217996597290039, "eval_precision_B-DISEASE": 0.8439005833121989, "eval_precision_B-MEDICATION": 0.9507299270072993, "eval_precision_B-PROCEDURE": 0.8642271636248359, "eval_precision_B-SYMPTOM": 0.8188329471244997, "eval_precision_I-DISEASE": 0.8612162292396158, "eval_precision_I-MEDICATION": 0.9526327031934209, "eval_precision_I-PROCEDURE": 0.8682078109649086, "eval_precision_I-SYMPTOM": 0.8107099237044831, "eval_precision_O": 0.9254912549627295, "eval_precision_macro": 0.8773276159037767, "eval_precision_micro": 0.8938970887232961, "eval_rauc_macro": 0.9183518246652276, "eval_rauc_micro": 0.9385967353145594, "eval_recall_B-DISEASE": 0.7918220036487665, "eval_recall_B-MEDICATION": 0.9272112475529454, "eval_recall_B-PROCEDURE": 0.8163501026694046, "eval_recall_B-SYMPTOM": 0.7981519507186858, "eval_recall_I-DISEASE": 0.8407280573012708, "eval_recall_I-MEDICATION": 0.943512984608875, "eval_recall_I-PROCEDURE": 0.8437089443234025, "eval_recall_I-SYMPTOM": 0.8238340855362132, "eval_recall_O": 0.9289028258275723, "eval_recall_macro": 0.8571358002430152, "eval_recall_micro": 0.8906680357402255, "eval_roc_auc_B-DISEASE": 0.8945483627101096, "eval_roc_auc_B-MEDICATION": 0.9635074195320126, "eval_roc_auc_B-PROCEDURE": 0.9070703509413875, "eval_roc_auc_B-SYMPTOM": 0.897166398981839, "eval_roc_auc_I-DISEASE": 0.9115174466724183, "eval_roc_auc_I-MEDICATION": 0.9714307242336749, "eval_roc_auc_I-PROCEDURE": 0.9156151956077059, "eval_roc_auc_I-SYMPTOM": 0.896927321744982, "eval_roc_auc_O": 0.9073832015629196, "eval_runtime": 61.0949, "eval_samples_per_second": 177.969, "eval_steps_per_second": 22.26, "step": 185297 }, { "epoch": 7.001775527936232, "grad_norm": 0.7092540860176086, "learning_rate": 6.001964413886896e-06, "loss": 0.0041, "step": 185344 }, { "epoch": 7.0114464886101775, "grad_norm": 0.18654197454452515, "learning_rate": 5.982622492539005e-06, "loss": 0.0024, "step": 185600 }, { "epoch": 7.021117449284122, "grad_norm": 0.8840870261192322, "learning_rate": 5.963280571191115e-06, "loss": 0.0023, "step": 185856 }, { "epoch": 7.030788409958068, "grad_norm": 0.08226889371871948, "learning_rate": 5.943938649843225e-06, "loss": 0.0041, "step": 186112 }, { "epoch": 7.040459370632012, "grad_norm": 0.1381804347038269, "learning_rate": 5.9245967284953355e-06, "loss": 0.0031, "step": 186368 }, { "epoch": 7.050130331305957, "grad_norm": 2.1430623531341553, "learning_rate": 5.905254807147445e-06, "loss": 0.0022, "step": 186624 }, { "epoch": 7.059801291979903, "grad_norm": 0.060144223272800446, "learning_rate": 5.885912885799555e-06, "loss": 0.0022, "step": 186880 }, { "epoch": 7.069472252653847, "grad_norm": 0.06146460771560669, "learning_rate": 5.866570964451664e-06, "loss": 0.0035, "step": 187136 }, { "epoch": 7.079143213327793, "grad_norm": 1.6876006126403809, "learning_rate": 5.847229043103775e-06, "loss": 0.0034, "step": 187392 }, { "epoch": 7.088814174001738, "grad_norm": 2.21935772895813, "learning_rate": 5.8278871217558845e-06, "loss": 0.0045, "step": 187648 }, { "epoch": 7.098485134675683, "grad_norm": 0.2139071524143219, "learning_rate": 5.808545200407995e-06, "loss": 0.0028, "step": 187904 }, { "epoch": 7.108156095349628, "grad_norm": 0.2530527710914612, "learning_rate": 5.789203279060104e-06, "loss": 0.0029, "step": 188160 }, { "epoch": 7.117827056023573, "grad_norm": 0.06350823491811752, "learning_rate": 5.769861357712214e-06, "loss": 0.0028, "step": 188416 }, { "epoch": 7.127498016697518, "grad_norm": 0.3266438841819763, "learning_rate": 5.750519436364323e-06, "loss": 0.0057, "step": 188672 }, { "epoch": 7.137168977371463, "grad_norm": 0.02727964147925377, "learning_rate": 5.731253069396698e-06, "loss": 0.0042, "step": 188928 }, { "epoch": 7.146839938045408, "grad_norm": 0.007432880811393261, "learning_rate": 5.711911148048809e-06, "loss": 0.0035, "step": 189184 }, { "epoch": 7.156510898719353, "grad_norm": 0.09012371301651001, "learning_rate": 5.692569226700918e-06, "loss": 0.0039, "step": 189440 }, { "epoch": 7.166181859393299, "grad_norm": 0.19539253413677216, "learning_rate": 5.6732273053530285e-06, "loss": 0.0039, "step": 189696 }, { "epoch": 7.175852820067243, "grad_norm": 3.682310104370117, "learning_rate": 5.653885384005138e-06, "loss": 0.0031, "step": 189952 }, { "epoch": 7.185523780741189, "grad_norm": 0.4140027165412903, "learning_rate": 5.634543462657248e-06, "loss": 0.0044, "step": 190208 }, { "epoch": 7.195194741415134, "grad_norm": 0.047425903379917145, "learning_rate": 5.615201541309357e-06, "loss": 0.0032, "step": 190464 }, { "epoch": 7.204865702089078, "grad_norm": 0.08099602907896042, "learning_rate": 5.595859619961468e-06, "loss": 0.0027, "step": 190720 }, { "epoch": 7.214536662763024, "grad_norm": 2.0335052013397217, "learning_rate": 5.5765176986135775e-06, "loss": 0.0038, "step": 190976 }, { "epoch": 7.2242076234369685, "grad_norm": 0.7074165940284729, "learning_rate": 5.557251331645953e-06, "loss": 0.0033, "step": 191232 }, { "epoch": 7.233878584110914, "grad_norm": 0.00729184877127409, "learning_rate": 5.537909410298062e-06, "loss": 0.0033, "step": 191488 }, { "epoch": 7.243549544784859, "grad_norm": 0.2962284982204437, "learning_rate": 5.518643043330437e-06, "loss": 0.0033, "step": 191744 }, { "epoch": 7.253220505458804, "grad_norm": 0.03666882589459419, "learning_rate": 5.499301121982547e-06, "loss": 0.0032, "step": 192000 }, { "epoch": 7.262891466132749, "grad_norm": 2.278210163116455, "learning_rate": 5.479959200634657e-06, "loss": 0.0026, "step": 192256 }, { "epoch": 7.272562426806694, "grad_norm": 0.6450464129447937, "learning_rate": 5.460617279286768e-06, "loss": 0.0031, "step": 192512 }, { "epoch": 7.282233387480639, "grad_norm": 0.011020343750715256, "learning_rate": 5.441275357938877e-06, "loss": 0.0026, "step": 192768 }, { "epoch": 7.291904348154584, "grad_norm": 0.848145604133606, "learning_rate": 5.421933436590987e-06, "loss": 0.0031, "step": 193024 }, { "epoch": 7.30157530882853, "grad_norm": 0.47598353028297424, "learning_rate": 5.402591515243096e-06, "loss": 0.0035, "step": 193280 }, { "epoch": 7.311246269502474, "grad_norm": 0.08850053697824478, "learning_rate": 5.3832495938952064e-06, "loss": 0.0028, "step": 193536 }, { "epoch": 7.32091723017642, "grad_norm": 0.14100511372089386, "learning_rate": 5.363907672547316e-06, "loss": 0.0028, "step": 193792 }, { "epoch": 7.3305881908503645, "grad_norm": 0.0643409714102745, "learning_rate": 5.344565751199427e-06, "loss": 0.0034, "step": 194048 }, { "epoch": 7.340259151524309, "grad_norm": 0.02023415081202984, "learning_rate": 5.325223829851536e-06, "loss": 0.0034, "step": 194304 }, { "epoch": 7.349930112198255, "grad_norm": 0.2690616846084595, "learning_rate": 5.305881908503646e-06, "loss": 0.0036, "step": 194560 }, { "epoch": 7.3596010728721994, "grad_norm": 1.2020032405853271, "learning_rate": 5.286539987155755e-06, "loss": 0.0021, "step": 194816 }, { "epoch": 7.369272033546145, "grad_norm": 0.49315980076789856, "learning_rate": 5.2671980658078655e-06, "loss": 0.0033, "step": 195072 }, { "epoch": 7.37894299422009, "grad_norm": 0.03424458205699921, "learning_rate": 5.247856144459975e-06, "loss": 0.0034, "step": 195328 }, { "epoch": 7.388613954894035, "grad_norm": 0.5558503270149231, "learning_rate": 5.228514223112086e-06, "loss": 0.0039, "step": 195584 }, { "epoch": 7.39828491556798, "grad_norm": 0.1527431756258011, "learning_rate": 5.209172301764195e-06, "loss": 0.0025, "step": 195840 }, { "epoch": 7.4079558762419255, "grad_norm": 0.03023804910480976, "learning_rate": 5.189830380416305e-06, "loss": 0.0029, "step": 196096 }, { "epoch": 7.41762683691587, "grad_norm": 0.006813144311308861, "learning_rate": 5.1704884590684145e-06, "loss": 0.002, "step": 196352 }, { "epoch": 7.427297797589815, "grad_norm": 0.06950301676988602, "learning_rate": 5.151146537720525e-06, "loss": 0.0035, "step": 196608 }, { "epoch": 7.4369687582637605, "grad_norm": 0.12590453028678894, "learning_rate": 5.131804616372634e-06, "loss": 0.0027, "step": 196864 }, { "epoch": 7.446639718937705, "grad_norm": 1.1601083278656006, "learning_rate": 5.112462695024745e-06, "loss": 0.0025, "step": 197120 }, { "epoch": 7.456310679611651, "grad_norm": 1.2641323804855347, "learning_rate": 5.093120773676854e-06, "loss": 0.0031, "step": 197376 }, { "epoch": 7.465981640285595, "grad_norm": 0.9254095554351807, "learning_rate": 5.073778852328964e-06, "loss": 0.003, "step": 197632 }, { "epoch": 7.475652600959541, "grad_norm": 0.05598944053053856, "learning_rate": 5.0544369309810736e-06, "loss": 0.003, "step": 197888 }, { "epoch": 7.485323561633486, "grad_norm": 0.040579646825790405, "learning_rate": 5.035170564013449e-06, "loss": 0.0028, "step": 198144 }, { "epoch": 7.494994522307431, "grad_norm": 8.562992095947266, "learning_rate": 5.0158286426655585e-06, "loss": 0.0043, "step": 198400 }, { "epoch": 7.504665482981376, "grad_norm": 0.09161358326673508, "learning_rate": 4.996562275697933e-06, "loss": 0.0028, "step": 198656 }, { "epoch": 7.514336443655321, "grad_norm": 0.06476159393787384, "learning_rate": 4.9772203543500435e-06, "loss": 0.0029, "step": 198912 }, { "epoch": 7.524007404329266, "grad_norm": 0.025177879258990288, "learning_rate": 4.957878433002154e-06, "loss": 0.0041, "step": 199168 }, { "epoch": 7.533678365003211, "grad_norm": 0.06914424151182175, "learning_rate": 4.938536511654263e-06, "loss": 0.0036, "step": 199424 }, { "epoch": 7.543349325677156, "grad_norm": 0.007711977697908878, "learning_rate": 4.919194590306373e-06, "loss": 0.0037, "step": 199680 }, { "epoch": 7.553020286351101, "grad_norm": 0.5741889476776123, "learning_rate": 4.899852668958483e-06, "loss": 0.0021, "step": 199936 }, { "epoch": 7.562691247025047, "grad_norm": 0.017532778903841972, "learning_rate": 4.880510747610593e-06, "loss": 0.0039, "step": 200192 }, { "epoch": 7.572362207698991, "grad_norm": 0.013444333337247372, "learning_rate": 4.8611688262627025e-06, "loss": 0.0032, "step": 200448 }, { "epoch": 7.582033168372936, "grad_norm": 0.1252066045999527, "learning_rate": 4.8418269049148135e-06, "loss": 0.0026, "step": 200704 }, { "epoch": 7.591704129046882, "grad_norm": 0.16216708719730377, "learning_rate": 4.822560537947188e-06, "loss": 0.0028, "step": 200960 }, { "epoch": 7.601375089720826, "grad_norm": 0.20117320120334625, "learning_rate": 4.803218616599298e-06, "loss": 0.0026, "step": 201216 }, { "epoch": 7.611046050394772, "grad_norm": 0.4220730662345886, "learning_rate": 4.783876695251408e-06, "loss": 0.0033, "step": 201472 }, { "epoch": 7.620717011068717, "grad_norm": 0.04324984550476074, "learning_rate": 4.764534773903518e-06, "loss": 0.0032, "step": 201728 }, { "epoch": 7.630387971742662, "grad_norm": 0.24287299811840057, "learning_rate": 4.745192852555627e-06, "loss": 0.0029, "step": 201984 }, { "epoch": 7.640058932416607, "grad_norm": 0.01797611638903618, "learning_rate": 4.725850931207737e-06, "loss": 0.0023, "step": 202240 }, { "epoch": 7.6497298930905515, "grad_norm": 0.027291102334856987, "learning_rate": 4.706509009859847e-06, "loss": 0.0037, "step": 202496 }, { "epoch": 7.659400853764497, "grad_norm": 0.015763144940137863, "learning_rate": 4.687167088511957e-06, "loss": 0.0024, "step": 202752 }, { "epoch": 7.669071814438442, "grad_norm": 2.068912982940674, "learning_rate": 4.6679007215443315e-06, "loss": 0.0027, "step": 203008 }, { "epoch": 7.678742775112387, "grad_norm": 0.1518033742904663, "learning_rate": 4.648558800196442e-06, "loss": 0.0025, "step": 203264 }, { "epoch": 7.688413735786332, "grad_norm": 0.7454547882080078, "learning_rate": 4.629216878848552e-06, "loss": 0.0027, "step": 203520 }, { "epoch": 7.698084696460278, "grad_norm": 0.04685609042644501, "learning_rate": 4.609874957500661e-06, "loss": 0.0034, "step": 203776 }, { "epoch": 7.707755657134222, "grad_norm": 0.012423527427017689, "learning_rate": 4.590533036152771e-06, "loss": 0.002, "step": 204032 }, { "epoch": 7.717426617808167, "grad_norm": 0.5419167876243591, "learning_rate": 4.571191114804881e-06, "loss": 0.0025, "step": 204288 }, { "epoch": 7.7270975784821125, "grad_norm": 0.3725039064884186, "learning_rate": 4.551849193456991e-06, "loss": 0.0028, "step": 204544 }, { "epoch": 7.736768539156057, "grad_norm": 0.09141060709953308, "learning_rate": 4.532507272109101e-06, "loss": 0.0024, "step": 204800 }, { "epoch": 7.746439499830003, "grad_norm": 0.13550949096679688, "learning_rate": 4.513165350761211e-06, "loss": 0.0025, "step": 205056 }, { "epoch": 7.7561104605039475, "grad_norm": 0.005226655397564173, "learning_rate": 4.49382342941332e-06, "loss": 0.0027, "step": 205312 }, { "epoch": 7.765781421177893, "grad_norm": 1.0764216184616089, "learning_rate": 4.47448150806543e-06, "loss": 0.0029, "step": 205568 }, { "epoch": 7.775452381851838, "grad_norm": 0.019114414229989052, "learning_rate": 4.45513958671754e-06, "loss": 0.0023, "step": 205824 }, { "epoch": 7.785123342525783, "grad_norm": 0.214304581284523, "learning_rate": 4.43579766536965e-06, "loss": 0.0028, "step": 206080 }, { "epoch": 7.794794303199728, "grad_norm": 0.9313531517982483, "learning_rate": 4.41645574402176e-06, "loss": 0.0027, "step": 206336 }, { "epoch": 7.804465263873674, "grad_norm": 0.04551494121551514, "learning_rate": 4.3971893770541355e-06, "loss": 0.0025, "step": 206592 }, { "epoch": 7.814136224547618, "grad_norm": 0.25390639901161194, "learning_rate": 4.377847455706245e-06, "loss": 0.0028, "step": 206848 }, { "epoch": 7.823807185221563, "grad_norm": 0.4188549816608429, "learning_rate": 4.358505534358355e-06, "loss": 0.0035, "step": 207104 }, { "epoch": 7.8334781458955085, "grad_norm": 5.584123134613037, "learning_rate": 4.339163613010465e-06, "loss": 0.0029, "step": 207360 }, { "epoch": 7.843149106569453, "grad_norm": 0.09376902878284454, "learning_rate": 4.319821691662574e-06, "loss": 0.0025, "step": 207616 }, { "epoch": 7.852820067243399, "grad_norm": 0.007881022058427334, "learning_rate": 4.3004797703146844e-06, "loss": 0.0031, "step": 207872 }, { "epoch": 7.8624910279173434, "grad_norm": 0.053988266736269, "learning_rate": 4.2811378489667946e-06, "loss": 0.0031, "step": 208128 }, { "epoch": 7.872161988591289, "grad_norm": 0.37332120537757874, "learning_rate": 4.261795927618904e-06, "loss": 0.0021, "step": 208384 }, { "epoch": 7.881832949265234, "grad_norm": 0.10056313127279282, "learning_rate": 4.242454006271014e-06, "loss": 0.003, "step": 208640 }, { "epoch": 7.891503909939178, "grad_norm": 0.021642550826072693, "learning_rate": 4.223187639303389e-06, "loss": 0.003, "step": 208896 }, { "epoch": 7.901174870613124, "grad_norm": 0.15152160823345184, "learning_rate": 4.203845717955499e-06, "loss": 0.0028, "step": 209152 }, { "epoch": 7.910845831287069, "grad_norm": 0.05597339943051338, "learning_rate": 4.184503796607608e-06, "loss": 0.0026, "step": 209408 }, { "epoch": 7.920516791961014, "grad_norm": 0.026413604617118835, "learning_rate": 4.165161875259718e-06, "loss": 0.0027, "step": 209664 }, { "epoch": 7.930187752634959, "grad_norm": 0.086790531873703, "learning_rate": 4.1458199539118285e-06, "loss": 0.0018, "step": 209920 }, { "epoch": 7.9398587133089045, "grad_norm": 0.40138956904411316, "learning_rate": 4.126478032563938e-06, "loss": 0.0026, "step": 210176 }, { "epoch": 7.949529673982849, "grad_norm": 0.504784107208252, "learning_rate": 4.107136111216048e-06, "loss": 0.003, "step": 210432 }, { "epoch": 7.959200634656794, "grad_norm": 0.5855137705802917, "learning_rate": 4.087794189868158e-06, "loss": 0.0038, "step": 210688 }, { "epoch": 7.968871595330739, "grad_norm": 0.23986396193504333, "learning_rate": 4.068452268520267e-06, "loss": 0.0027, "step": 210944 }, { "epoch": 7.978542556004684, "grad_norm": 0.08508482575416565, "learning_rate": 4.049110347172377e-06, "loss": 0.0039, "step": 211200 }, { "epoch": 7.98821351667863, "grad_norm": 2.018085241317749, "learning_rate": 4.0297684258244875e-06, "loss": 0.0018, "step": 211456 }, { "epoch": 7.997884477352574, "grad_norm": 0.023388510569930077, "learning_rate": 4.010502058856862e-06, "loss": 0.0022, "step": 211712 }, { "epoch": 8.0, "eval_f1_B-DISEASE": 0.8192092717409173, "eval_f1_B-MEDICATION": 0.9423820224719102, "eval_f1_B-PROCEDURE": 0.8420637009739406, "eval_f1_B-SYMPTOM": 0.8127123442808607, "eval_f1_I-DISEASE": 0.850955341029534, "eval_f1_I-MEDICATION": 0.949058663755226, "eval_f1_I-PROCEDURE": 0.8554017038172543, "eval_f1_I-SYMPTOM": 0.8136104891443594, "eval_f1_O": 0.9274238987873996, "eval_f1_macro": 0.868090826222378, "eval_f1_micro": 0.8928833447754752, "eval_loss": 0.4737370014190674, "eval_precision_B-DISEASE": 0.8500213219616205, "eval_precision_B-MEDICATION": 0.9520523065746458, "eval_precision_B-PROCEDURE": 0.8641274986493788, "eval_precision_B-SYMPTOM": 0.8414187307635937, "eval_precision_I-DISEASE": 0.8638646013066827, "eval_precision_I-MEDICATION": 0.9557009546283385, "eval_precision_I-PROCEDURE": 0.8787206030410202, "eval_precision_I-SYMPTOM": 0.8445731862439002, "eval_precision_O": 0.9156305475079966, "eval_precision_macro": 0.8851233056307976, "eval_precision_micro": 0.895332761029088, "eval_rauc_macro": 0.9153957194153303, "eval_rauc_micro": 0.9385898278409849, "eval_recall_B-DISEASE": 0.7905528674545887, "eval_recall_B-MEDICATION": 0.9329062110695854, "eval_recall_B-PROCEDURE": 0.8210985626283368, "eval_recall_B-SYMPTOM": 0.7859000684462697, "eval_recall_I-DISEASE": 0.8384262221051443, "eval_recall_I-MEDICATION": 0.942508065795737, "eval_recall_I-PROCEDURE": 0.8332884525936988, "eval_recall_I-SYMPTOM": 0.7848377390930582, "eval_recall_O": 0.9395250115419437, "eval_recall_macro": 0.8521159111920403, "eval_recall_micro": 0.8904472939909823, "eval_roc_auc_B-DISEASE": 0.8939787349011639, "eval_roc_auc_B-MEDICATION": 0.966357083606876, "eval_roc_auc_B-PROCEDURE": 0.9094372113385102, "eval_roc_auc_B-SYMPTOM": 0.8913483585328563, "eval_roc_auc_I-DISEASE": 0.9105856356600845, "eval_roc_auc_I-MEDICATION": 0.9709506682682745, "eval_roc_auc_I-PROCEDURE": 0.9110413996799644, "eval_roc_auc_I-SYMPTOM": 0.8811634945418189, "eval_roc_auc_O": 0.9036988882084248, "eval_runtime": 61.2016, "eval_samples_per_second": 177.659, "eval_steps_per_second": 22.222, "step": 211768 }, { "epoch": 8.00755543802652, "grad_norm": 0.04221045598387718, "learning_rate": 3.9911601375089725e-06, "loss": 0.0014, "step": 211968 }, { "epoch": 8.017226398700465, "grad_norm": 0.10489369928836823, "learning_rate": 3.971818216161083e-06, "loss": 0.0014, "step": 212224 }, { "epoch": 8.02689735937441, "grad_norm": 0.005116314627230167, "learning_rate": 3.952476294813192e-06, "loss": 0.0017, "step": 212480 }, { "epoch": 8.036568320048355, "grad_norm": 0.013480676338076591, "learning_rate": 3.933134373465302e-06, "loss": 0.0015, "step": 212736 }, { "epoch": 8.0462392807223, "grad_norm": 0.009527523070573807, "learning_rate": 3.913792452117412e-06, "loss": 0.0017, "step": 212992 }, { "epoch": 8.055910241396244, "grad_norm": 0.06975742429494858, "learning_rate": 3.8944505307695214e-06, "loss": 0.0018, "step": 213248 }, { "epoch": 8.06558120207019, "grad_norm": 1.0523467063903809, "learning_rate": 3.8751086094216316e-06, "loss": 0.0008, "step": 213504 }, { "epoch": 8.075252162744135, "grad_norm": 0.021572506055235863, "learning_rate": 3.855766688073742e-06, "loss": 0.0015, "step": 213760 }, { "epoch": 8.084923123418081, "grad_norm": 0.004556609317660332, "learning_rate": 3.8365003211061165e-06, "loss": 0.0025, "step": 214016 }, { "epoch": 8.094594084092025, "grad_norm": 0.01564151607453823, "learning_rate": 3.817158399758226e-06, "loss": 0.002, "step": 214272 }, { "epoch": 8.10426504476597, "grad_norm": 0.02460288256406784, "learning_rate": 3.797816478410336e-06, "loss": 0.0023, "step": 214528 }, { "epoch": 8.113936005439916, "grad_norm": 0.19753815233707428, "learning_rate": 3.778474557062446e-06, "loss": 0.0022, "step": 214784 }, { "epoch": 8.12360696611386, "grad_norm": 0.0022627103608101606, "learning_rate": 3.7591326357145558e-06, "loss": 0.0027, "step": 215040 }, { "epoch": 8.133277926787805, "grad_norm": 0.027415508404374123, "learning_rate": 3.7397907143666655e-06, "loss": 0.0013, "step": 215296 }, { "epoch": 8.14294888746175, "grad_norm": 0.13008584082126617, "learning_rate": 3.7204487930187756e-06, "loss": 0.0023, "step": 215552 }, { "epoch": 8.152619848135696, "grad_norm": 0.03469686210155487, "learning_rate": 3.7011068716708853e-06, "loss": 0.0017, "step": 215808 }, { "epoch": 8.16229080880964, "grad_norm": 0.27707692980766296, "learning_rate": 3.681764950322995e-06, "loss": 0.0016, "step": 216064 }, { "epoch": 8.171961769483586, "grad_norm": 0.001276107388548553, "learning_rate": 3.662423028975105e-06, "loss": 0.0019, "step": 216320 }, { "epoch": 8.181632730157531, "grad_norm": 0.1010005995631218, "learning_rate": 3.6431566620074804e-06, "loss": 0.0017, "step": 216576 }, { "epoch": 8.191303690831475, "grad_norm": 0.0014761561760678887, "learning_rate": 3.62381474065959e-06, "loss": 0.0012, "step": 216832 }, { "epoch": 8.20097465150542, "grad_norm": 0.01065876055508852, "learning_rate": 3.6044728193117002e-06, "loss": 0.0018, "step": 217088 }, { "epoch": 8.210645612179366, "grad_norm": 0.013767397962510586, "learning_rate": 3.58513089796381e-06, "loss": 0.0019, "step": 217344 }, { "epoch": 8.220316572853312, "grad_norm": 0.06974133104085922, "learning_rate": 3.5657889766159196e-06, "loss": 0.0016, "step": 217600 }, { "epoch": 8.229987533527256, "grad_norm": 0.0005238667945377529, "learning_rate": 3.5464470552680298e-06, "loss": 0.002, "step": 217856 }, { "epoch": 8.239658494201201, "grad_norm": 0.0016113127348944545, "learning_rate": 3.5271051339201395e-06, "loss": 0.0021, "step": 218112 }, { "epoch": 8.249329454875147, "grad_norm": 0.006277918349951506, "learning_rate": 3.5077632125722496e-06, "loss": 0.002, "step": 218368 }, { "epoch": 8.259000415549092, "grad_norm": 6.543371200561523, "learning_rate": 3.4884212912243593e-06, "loss": 0.0032, "step": 218624 }, { "epoch": 8.268671376223036, "grad_norm": 0.002728199353441596, "learning_rate": 3.469154924256734e-06, "loss": 0.0017, "step": 218880 }, { "epoch": 8.278342336896982, "grad_norm": 0.0380263552069664, "learning_rate": 3.449813002908844e-06, "loss": 0.0015, "step": 219136 }, { "epoch": 8.288013297570927, "grad_norm": 0.09521731734275818, "learning_rate": 3.430471081560954e-06, "loss": 0.0014, "step": 219392 }, { "epoch": 8.297684258244871, "grad_norm": 0.05859646946191788, "learning_rate": 3.4111291602130637e-06, "loss": 0.0016, "step": 219648 }, { "epoch": 8.307355218918817, "grad_norm": 0.057360127568244934, "learning_rate": 3.3917872388651734e-06, "loss": 0.0015, "step": 219904 }, { "epoch": 8.317026179592762, "grad_norm": 0.005022614262998104, "learning_rate": 3.3724453175172835e-06, "loss": 0.0018, "step": 220160 }, { "epoch": 8.326697140266708, "grad_norm": 0.009211408905684948, "learning_rate": 3.353103396169393e-06, "loss": 0.0019, "step": 220416 }, { "epoch": 8.336368100940652, "grad_norm": 0.8216173052787781, "learning_rate": 3.333761474821503e-06, "loss": 0.0015, "step": 220672 }, { "epoch": 8.346039061614597, "grad_norm": 0.036033984273672104, "learning_rate": 3.314419553473613e-06, "loss": 0.0019, "step": 220928 }, { "epoch": 8.355710022288543, "grad_norm": 0.28528496623039246, "learning_rate": 3.295153186505988e-06, "loss": 0.0016, "step": 221184 }, { "epoch": 8.365380982962487, "grad_norm": 0.21900594234466553, "learning_rate": 3.2758112651580976e-06, "loss": 0.0011, "step": 221440 }, { "epoch": 8.375051943636432, "grad_norm": 0.1995537430047989, "learning_rate": 3.2564693438102073e-06, "loss": 0.0011, "step": 221696 }, { "epoch": 8.384722904310378, "grad_norm": 0.05183367431163788, "learning_rate": 3.2371274224623174e-06, "loss": 0.0016, "step": 221952 }, { "epoch": 8.394393864984323, "grad_norm": 0.8787815570831299, "learning_rate": 3.217785501114427e-06, "loss": 0.0016, "step": 222208 }, { "epoch": 8.404064825658267, "grad_norm": 0.013117530383169651, "learning_rate": 3.198443579766537e-06, "loss": 0.0017, "step": 222464 }, { "epoch": 8.413735786332213, "grad_norm": 0.005170230288058519, "learning_rate": 3.179101658418647e-06, "loss": 0.0018, "step": 222720 }, { "epoch": 8.423406747006158, "grad_norm": 0.07186521589756012, "learning_rate": 3.1597597370707567e-06, "loss": 0.0024, "step": 222976 }, { "epoch": 8.433077707680102, "grad_norm": 0.4515911042690277, "learning_rate": 3.1404178157228664e-06, "loss": 0.0023, "step": 223232 }, { "epoch": 8.442748668354048, "grad_norm": 0.05934571474790573, "learning_rate": 3.121151448755242e-06, "loss": 0.0024, "step": 223488 }, { "epoch": 8.452419629027993, "grad_norm": 0.5439963936805725, "learning_rate": 3.1018095274073517e-06, "loss": 0.0015, "step": 223744 }, { "epoch": 8.462090589701939, "grad_norm": 0.002186891855672002, "learning_rate": 3.0824676060594614e-06, "loss": 0.0012, "step": 224000 }, { "epoch": 8.471761550375883, "grad_norm": 0.005363143049180508, "learning_rate": 3.0631256847115716e-06, "loss": 0.0019, "step": 224256 }, { "epoch": 8.481432511049828, "grad_norm": 0.8389096856117249, "learning_rate": 3.0437837633636813e-06, "loss": 0.0014, "step": 224512 }, { "epoch": 8.491103471723774, "grad_norm": 0.009227742440998554, "learning_rate": 3.024441842015791e-06, "loss": 0.0015, "step": 224768 }, { "epoch": 8.500774432397717, "grad_norm": 0.021440809592604637, "learning_rate": 3.005099920667901e-06, "loss": 0.0012, "step": 225024 }, { "epoch": 8.510445393071663, "grad_norm": 0.053202465176582336, "learning_rate": 2.985757999320011e-06, "loss": 0.0021, "step": 225280 }, { "epoch": 8.520116353745609, "grad_norm": 0.09795872122049332, "learning_rate": 2.9664160779721205e-06, "loss": 0.0011, "step": 225536 }, { "epoch": 8.529787314419554, "grad_norm": 0.07053809612989426, "learning_rate": 2.9470741566242306e-06, "loss": 0.002, "step": 225792 }, { "epoch": 8.539458275093498, "grad_norm": 0.015584302134811878, "learning_rate": 2.9277322352763403e-06, "loss": 0.0017, "step": 226048 }, { "epoch": 8.549129235767444, "grad_norm": 0.014636941254138947, "learning_rate": 2.9083903139284505e-06, "loss": 0.002, "step": 226304 }, { "epoch": 8.55880019644139, "grad_norm": 0.002259760396555066, "learning_rate": 2.88904839258056e-06, "loss": 0.0022, "step": 226560 }, { "epoch": 8.568471157115333, "grad_norm": 0.1187112033367157, "learning_rate": 2.86970647123267e-06, "loss": 0.0019, "step": 226816 }, { "epoch": 8.578142117789278, "grad_norm": 1.316362738609314, "learning_rate": 2.85036454988478e-06, "loss": 0.0018, "step": 227072 }, { "epoch": 8.587813078463224, "grad_norm": 0.02268380858004093, "learning_rate": 2.8310226285368897e-06, "loss": 0.0015, "step": 227328 }, { "epoch": 8.59748403913717, "grad_norm": 0.11864591389894485, "learning_rate": 2.8117562615692645e-06, "loss": 0.0016, "step": 227584 }, { "epoch": 8.607154999811113, "grad_norm": 0.1629875749349594, "learning_rate": 2.7924143402213743e-06, "loss": 0.0013, "step": 227840 }, { "epoch": 8.616825960485059, "grad_norm": 0.0012598687317222357, "learning_rate": 2.7730724188734844e-06, "loss": 0.0015, "step": 228096 }, { "epoch": 8.626496921159005, "grad_norm": 0.012492740526795387, "learning_rate": 2.753730497525594e-06, "loss": 0.0017, "step": 228352 }, { "epoch": 8.63616788183295, "grad_norm": 0.14352132380008698, "learning_rate": 2.734388576177704e-06, "loss": 0.0013, "step": 228608 }, { "epoch": 8.645838842506894, "grad_norm": 0.022044667974114418, "learning_rate": 2.715046654829814e-06, "loss": 0.0015, "step": 228864 }, { "epoch": 8.65550980318084, "grad_norm": 0.28277599811553955, "learning_rate": 2.6957047334819236e-06, "loss": 0.0024, "step": 229120 }, { "epoch": 8.665180763854785, "grad_norm": 0.005805708467960358, "learning_rate": 2.6763628121340333e-06, "loss": 0.0018, "step": 229376 }, { "epoch": 8.674851724528729, "grad_norm": 0.042201265692710876, "learning_rate": 2.6570208907861435e-06, "loss": 0.0013, "step": 229632 }, { "epoch": 8.684522685202674, "grad_norm": 0.5332639813423157, "learning_rate": 2.637678969438253e-06, "loss": 0.001, "step": 229888 }, { "epoch": 8.69419364587662, "grad_norm": 0.001226294320076704, "learning_rate": 2.618337048090363e-06, "loss": 0.0021, "step": 230144 }, { "epoch": 8.703864606550566, "grad_norm": 0.008391711860895157, "learning_rate": 2.598995126742473e-06, "loss": 0.0018, "step": 230400 }, { "epoch": 8.71353556722451, "grad_norm": 0.005565817467868328, "learning_rate": 2.5797287597748482e-06, "loss": 0.0014, "step": 230656 }, { "epoch": 8.723206527898455, "grad_norm": 0.004753118846565485, "learning_rate": 2.560386838426958e-06, "loss": 0.0012, "step": 230912 }, { "epoch": 8.7328774885724, "grad_norm": 0.0013271772768348455, "learning_rate": 2.541044917079068e-06, "loss": 0.0021, "step": 231168 }, { "epoch": 8.742548449246344, "grad_norm": 0.06540732830762863, "learning_rate": 2.5217029957311778e-06, "loss": 0.0017, "step": 231424 }, { "epoch": 8.75221940992029, "grad_norm": 0.16908077895641327, "learning_rate": 2.5023610743832875e-06, "loss": 0.002, "step": 231680 }, { "epoch": 8.761890370594235, "grad_norm": 0.29503974318504333, "learning_rate": 2.4830191530353976e-06, "loss": 0.0016, "step": 231936 }, { "epoch": 8.771561331268181, "grad_norm": 9.24250316619873, "learning_rate": 2.4636772316875073e-06, "loss": 0.0019, "step": 232192 }, { "epoch": 8.781232291942125, "grad_norm": 0.09708067774772644, "learning_rate": 2.444335310339617e-06, "loss": 0.0011, "step": 232448 }, { "epoch": 8.79090325261607, "grad_norm": 0.001615343731828034, "learning_rate": 2.425068943371992e-06, "loss": 0.0017, "step": 232704 }, { "epoch": 8.800574213290016, "grad_norm": 0.21001236140727997, "learning_rate": 2.405727022024102e-06, "loss": 0.0021, "step": 232960 }, { "epoch": 8.81024517396396, "grad_norm": 0.014585831202566624, "learning_rate": 2.3863851006762117e-06, "loss": 0.001, "step": 233216 }, { "epoch": 8.819916134637905, "grad_norm": 0.6824801564216614, "learning_rate": 2.367043179328322e-06, "loss": 0.0016, "step": 233472 }, { "epoch": 8.829587095311851, "grad_norm": 0.014681616798043251, "learning_rate": 2.3477012579804315e-06, "loss": 0.002, "step": 233728 }, { "epoch": 8.839258055985797, "grad_norm": 0.010957110673189163, "learning_rate": 2.3283593366325417e-06, "loss": 0.0013, "step": 233984 }, { "epoch": 8.84892901665974, "grad_norm": 1.2413954734802246, "learning_rate": 2.3090174152846514e-06, "loss": 0.0011, "step": 234240 }, { "epoch": 8.858599977333686, "grad_norm": 0.1283838450908661, "learning_rate": 2.289751048317026e-06, "loss": 0.0011, "step": 234496 }, { "epoch": 8.868270938007631, "grad_norm": 0.030070781707763672, "learning_rate": 2.2704846813494014e-06, "loss": 0.0013, "step": 234752 }, { "epoch": 8.877941898681575, "grad_norm": 0.10878092795610428, "learning_rate": 2.251142760001511e-06, "loss": 0.0012, "step": 235008 }, { "epoch": 8.88761285935552, "grad_norm": 0.0010157637298107147, "learning_rate": 2.2318008386536213e-06, "loss": 0.001, "step": 235264 }, { "epoch": 8.897283820029466, "grad_norm": 0.0011938668321818113, "learning_rate": 2.212458917305731e-06, "loss": 0.0012, "step": 235520 }, { "epoch": 8.906954780703412, "grad_norm": 0.0009772476041689515, "learning_rate": 2.1931169959578407e-06, "loss": 0.0012, "step": 235776 }, { "epoch": 8.916625741377356, "grad_norm": 0.02653772011399269, "learning_rate": 2.173775074609951e-06, "loss": 0.0011, "step": 236032 }, { "epoch": 8.926296702051301, "grad_norm": 0.002025567227974534, "learning_rate": 2.1544331532620605e-06, "loss": 0.0011, "step": 236288 }, { "epoch": 8.935967662725247, "grad_norm": 0.07364089787006378, "learning_rate": 2.1350912319141702e-06, "loss": 0.0017, "step": 236544 }, { "epoch": 8.94563862339919, "grad_norm": 0.000813729246146977, "learning_rate": 2.1157493105662803e-06, "loss": 0.0012, "step": 236800 }, { "epoch": 8.955309584073136, "grad_norm": 0.3964155912399292, "learning_rate": 2.09640738921839e-06, "loss": 0.0013, "step": 237056 }, { "epoch": 8.964980544747082, "grad_norm": 0.02675843983888626, "learning_rate": 2.0770654678704998e-06, "loss": 0.0013, "step": 237312 }, { "epoch": 8.974651505421027, "grad_norm": 0.45542263984680176, "learning_rate": 2.05772354652261e-06, "loss": 0.0009, "step": 237568 }, { "epoch": 8.984322466094971, "grad_norm": 0.004206574056297541, "learning_rate": 2.0383816251747196e-06, "loss": 0.0011, "step": 237824 }, { "epoch": 8.993993426768917, "grad_norm": 0.029356837272644043, "learning_rate": 2.0190397038268293e-06, "loss": 0.0011, "step": 238080 }, { "epoch": 9.0, "eval_f1_B-DISEASE": 0.820265182517597, "eval_f1_B-MEDICATION": 0.9411343069617418, "eval_f1_B-PROCEDURE": 0.8432354734732991, "eval_f1_B-SYMPTOM": 0.8137595552466991, "eval_f1_I-DISEASE": 0.853021119122057, "eval_f1_I-MEDICATION": 0.95037530170013, "eval_f1_I-PROCEDURE": 0.8561874578893519, "eval_f1_I-SYMPTOM": 0.8206522616146783, "eval_f1_O": 0.9278188308044935, "eval_f1_macro": 0.8696054988144497, "eval_f1_micro": 0.8938386961399782, "eval_loss": 0.507999837398529, "eval_precision_B-DISEASE": 0.8472398343055203, "eval_precision_B-MEDICATION": 0.9476723204619271, "eval_precision_B-PROCEDURE": 0.8588731144631766, "eval_precision_B-SYMPTOM": 0.826393789696542, "eval_precision_I-DISEASE": 0.8680188544493643, "eval_precision_I-MEDICATION": 0.9531815279846776, "eval_precision_I-PROCEDURE": 0.8730946382675522, "eval_precision_I-SYMPTOM": 0.8215935894536173, "eval_precision_O": 0.9236782838636837, "eval_precision_macro": 0.8799717725495623, "eval_precision_micro": 0.895900159381018, "eval_rauc_macro": 0.9196513344131847, "eval_rauc_micro": 0.9392897520293723, "eval_recall_B-DISEASE": 0.7949551836281431, "eval_recall_B-MEDICATION": 0.9346858871685353, "eval_recall_B-PROCEDURE": 0.8281570841889117, "eval_recall_B-SYMPTOM": 0.8015058179329226, "eval_recall_I-DISEASE": 0.8385328466237659, "eval_recall_I-MEDICATION": 0.9475855503252764, "eval_recall_I-PROCEDURE": 0.8399226419251389, "eval_recall_I-SYMPTOM": 0.8197130883301096, "eval_recall_O": 0.9319966663269038, "eval_recall_macro": 0.8596727518277453, "eval_recall_micro": 0.8917866979597766, "eval_roc_auc_B-DISEASE": 0.8961441020336804, "eval_roc_auc_B-MEDICATION": 0.9672374649513288, "eval_roc_auc_B-PROCEDURE": 0.9129064100226968, "eval_roc_auc_B-SYMPTOM": 0.8989321500948878, "eval_roc_auc_I-DISEASE": 0.9109411921441964, "eval_roc_auc_I-MEDICATION": 0.973469577978567, "eval_roc_auc_I-PROCEDURE": 0.9140138334490733, "eval_roc_auc_I-SYMPTOM": 0.8959855777737502, "eval_roc_auc_O": 0.9072317012704816, "eval_runtime": 61.1454, "eval_samples_per_second": 177.822, "eval_steps_per_second": 22.242, "step": 238239 }, { "epoch": 9.003664387442862, "grad_norm": 0.0012333148624747992, "learning_rate": 1.9996977824789394e-06, "loss": 0.0009, "step": 238336 }, { "epoch": 9.013335348116808, "grad_norm": 0.05198514088988304, "learning_rate": 1.980355861131049e-06, "loss": 0.0013, "step": 238592 }, { "epoch": 9.023006308790752, "grad_norm": 0.002556259511038661, "learning_rate": 1.9610894941634244e-06, "loss": 0.0007, "step": 238848 }, { "epoch": 9.032677269464697, "grad_norm": 0.017605243250727654, "learning_rate": 1.941747572815534e-06, "loss": 0.0003, "step": 239104 }, { "epoch": 9.042348230138643, "grad_norm": 0.2546218931674957, "learning_rate": 1.9224056514676438e-06, "loss": 0.0011, "step": 239360 }, { "epoch": 9.052019190812587, "grad_norm": 0.005463745910674334, "learning_rate": 1.903063730119754e-06, "loss": 0.0007, "step": 239616 }, { "epoch": 9.061690151486532, "grad_norm": 0.013387962244451046, "learning_rate": 1.8837218087718636e-06, "loss": 0.0009, "step": 239872 }, { "epoch": 9.071361112160478, "grad_norm": 0.0036850119940936565, "learning_rate": 1.8643798874239735e-06, "loss": 0.0008, "step": 240128 }, { "epoch": 9.081032072834423, "grad_norm": 0.002097562188282609, "learning_rate": 1.8450379660760835e-06, "loss": 0.0006, "step": 240384 }, { "epoch": 9.090703033508367, "grad_norm": 0.009374704211950302, "learning_rate": 1.8256960447281932e-06, "loss": 0.0007, "step": 240640 }, { "epoch": 9.100373994182313, "grad_norm": 0.009544138796627522, "learning_rate": 1.806354123380303e-06, "loss": 0.001, "step": 240896 }, { "epoch": 9.110044954856258, "grad_norm": 0.014574944972991943, "learning_rate": 1.787012202032413e-06, "loss": 0.001, "step": 241152 }, { "epoch": 9.119715915530202, "grad_norm": 0.0014770117122679949, "learning_rate": 1.7676702806845227e-06, "loss": 0.001, "step": 241408 }, { "epoch": 9.129386876204148, "grad_norm": 0.00023075766512192786, "learning_rate": 1.7483283593366326e-06, "loss": 0.0006, "step": 241664 }, { "epoch": 9.139057836878093, "grad_norm": 0.036480896174907684, "learning_rate": 1.7289864379887425e-06, "loss": 0.0007, "step": 241920 }, { "epoch": 9.148728797552039, "grad_norm": 0.005274293944239616, "learning_rate": 1.7096445166408524e-06, "loss": 0.0008, "step": 242176 }, { "epoch": 9.158399758225983, "grad_norm": 0.0636928603053093, "learning_rate": 1.6903025952929622e-06, "loss": 0.0005, "step": 242432 }, { "epoch": 9.168070718899928, "grad_norm": 0.007083303295075893, "learning_rate": 1.670960673945072e-06, "loss": 0.0012, "step": 242688 }, { "epoch": 9.177741679573874, "grad_norm": 0.0029853142332285643, "learning_rate": 1.651618752597182e-06, "loss": 0.0013, "step": 242944 }, { "epoch": 9.187412640247818, "grad_norm": 0.004745794460177422, "learning_rate": 1.6322768312492917e-06, "loss": 0.0008, "step": 243200 }, { "epoch": 9.197083600921763, "grad_norm": 0.029157089069485664, "learning_rate": 1.6129349099014016e-06, "loss": 0.0009, "step": 243456 }, { "epoch": 9.206754561595709, "grad_norm": 0.09862171113491058, "learning_rate": 1.5935929885535115e-06, "loss": 0.001, "step": 243712 }, { "epoch": 9.216425522269654, "grad_norm": 0.02549828216433525, "learning_rate": 1.5742510672056212e-06, "loss": 0.0013, "step": 243968 }, { "epoch": 9.226096482943598, "grad_norm": 0.0003341349947731942, "learning_rate": 1.5549091458577311e-06, "loss": 0.0011, "step": 244224 }, { "epoch": 9.235767443617544, "grad_norm": 0.00141440168954432, "learning_rate": 1.535567224509841e-06, "loss": 0.0007, "step": 244480 }, { "epoch": 9.24543840429149, "grad_norm": 0.0013356832787394524, "learning_rate": 1.516225303161951e-06, "loss": 0.0014, "step": 244736 }, { "epoch": 9.255109364965433, "grad_norm": 0.06080584600567818, "learning_rate": 1.4968833818140607e-06, "loss": 0.0006, "step": 244992 }, { "epoch": 9.264780325639379, "grad_norm": 0.11237218230962753, "learning_rate": 1.4775414604661706e-06, "loss": 0.0012, "step": 245248 }, { "epoch": 9.274451286313324, "grad_norm": 0.23928683996200562, "learning_rate": 1.4581995391182805e-06, "loss": 0.0005, "step": 245504 }, { "epoch": 9.28412224698727, "grad_norm": 0.001771116629242897, "learning_rate": 1.4388576177703902e-06, "loss": 0.0011, "step": 245760 }, { "epoch": 9.293793207661214, "grad_norm": 0.0017612532246857882, "learning_rate": 1.4195156964225001e-06, "loss": 0.0007, "step": 246016 }, { "epoch": 9.303464168335159, "grad_norm": 0.00843687541782856, "learning_rate": 1.4002493294548752e-06, "loss": 0.0018, "step": 246272 }, { "epoch": 9.313135129009105, "grad_norm": 0.07218307256698608, "learning_rate": 1.380907408106985e-06, "loss": 0.0007, "step": 246528 }, { "epoch": 9.322806089683048, "grad_norm": 0.00684273662045598, "learning_rate": 1.361565486759095e-06, "loss": 0.0005, "step": 246784 }, { "epoch": 9.332477050356994, "grad_norm": 0.0004243789007887244, "learning_rate": 1.34229911979147e-06, "loss": 0.0005, "step": 247040 }, { "epoch": 9.34214801103094, "grad_norm": 0.000982985831797123, "learning_rate": 1.32295719844358e-06, "loss": 0.0009, "step": 247296 }, { "epoch": 9.351818971704885, "grad_norm": 0.008006760850548744, "learning_rate": 1.3036152770956897e-06, "loss": 0.0008, "step": 247552 }, { "epoch": 9.361489932378829, "grad_norm": 0.0012839402770623565, "learning_rate": 1.2842733557477996e-06, "loss": 0.0012, "step": 247808 }, { "epoch": 9.371160893052775, "grad_norm": 0.0076642511412501335, "learning_rate": 1.2649314343999095e-06, "loss": 0.0007, "step": 248064 }, { "epoch": 9.38083185372672, "grad_norm": 0.0976879671216011, "learning_rate": 1.2455895130520192e-06, "loss": 0.0006, "step": 248320 }, { "epoch": 9.390502814400666, "grad_norm": 2.341963291168213, "learning_rate": 1.2262475917041291e-06, "loss": 0.0009, "step": 248576 }, { "epoch": 9.40017377507461, "grad_norm": 0.002288688672706485, "learning_rate": 1.206905670356239e-06, "loss": 0.0007, "step": 248832 }, { "epoch": 9.409844735748555, "grad_norm": 0.0005827232380397618, "learning_rate": 1.187563749008349e-06, "loss": 0.0009, "step": 249088 }, { "epoch": 9.4195156964225, "grad_norm": 0.00030542805325239897, "learning_rate": 1.1682218276604587e-06, "loss": 0.0007, "step": 249344 }, { "epoch": 9.429186657096444, "grad_norm": 0.00033465458545833826, "learning_rate": 1.1488799063125686e-06, "loss": 0.0007, "step": 249600 }, { "epoch": 9.43885761777039, "grad_norm": 0.0005558193661272526, "learning_rate": 1.1295379849646785e-06, "loss": 0.001, "step": 249856 }, { "epoch": 9.448528578444336, "grad_norm": 0.008819201961159706, "learning_rate": 1.1101960636167882e-06, "loss": 0.0008, "step": 250112 }, { "epoch": 9.458199539118281, "grad_norm": 0.011124982498586178, "learning_rate": 1.0908541422688981e-06, "loss": 0.001, "step": 250368 }, { "epoch": 9.467870499792225, "grad_norm": 0.07561460882425308, "learning_rate": 1.071512220921008e-06, "loss": 0.001, "step": 250624 }, { "epoch": 9.47754146046617, "grad_norm": 0.00775932939723134, "learning_rate": 1.0521702995731177e-06, "loss": 0.0008, "step": 250880 }, { "epoch": 9.487212421140116, "grad_norm": 0.0026791319251060486, "learning_rate": 1.032903932605493e-06, "loss": 0.0009, "step": 251136 }, { "epoch": 9.49688338181406, "grad_norm": 0.0014814439928159118, "learning_rate": 1.0135620112576027e-06, "loss": 0.0009, "step": 251392 }, { "epoch": 9.506554342488005, "grad_norm": 0.17079958319664001, "learning_rate": 9.942200899097126e-07, "loss": 0.0008, "step": 251648 }, { "epoch": 9.516225303161951, "grad_norm": 0.006455567199736834, "learning_rate": 9.748781685618225e-07, "loss": 0.0008, "step": 251904 }, { "epoch": 9.525896263835897, "grad_norm": 0.0035214037634432316, "learning_rate": 9.555362472139324e-07, "loss": 0.0003, "step": 252160 }, { "epoch": 9.53556722450984, "grad_norm": 0.0005882234545424581, "learning_rate": 9.361943258660422e-07, "loss": 0.0004, "step": 252416 }, { "epoch": 9.545238185183786, "grad_norm": 0.00487788300961256, "learning_rate": 9.168524045181521e-07, "loss": 0.0011, "step": 252672 }, { "epoch": 9.554909145857732, "grad_norm": 0.0005454017664305866, "learning_rate": 8.975104831702619e-07, "loss": 0.0009, "step": 252928 }, { "epoch": 9.564580106531675, "grad_norm": 0.0002963479782920331, "learning_rate": 8.782441162026368e-07, "loss": 0.0008, "step": 253184 }, { "epoch": 9.574251067205621, "grad_norm": 0.014803556725382805, "learning_rate": 8.589021948547467e-07, "loss": 0.0007, "step": 253440 }, { "epoch": 9.583922027879566, "grad_norm": 0.0010204812278971076, "learning_rate": 8.395602735068565e-07, "loss": 0.0006, "step": 253696 }, { "epoch": 9.593592988553512, "grad_norm": 0.000603766180574894, "learning_rate": 8.202183521589665e-07, "loss": 0.0005, "step": 253952 }, { "epoch": 9.603263949227456, "grad_norm": 0.0006934937555342913, "learning_rate": 8.008764308110763e-07, "loss": 0.0004, "step": 254208 }, { "epoch": 9.612934909901401, "grad_norm": 0.002163499826565385, "learning_rate": 7.815345094631861e-07, "loss": 0.0012, "step": 254464 }, { "epoch": 9.622605870575347, "grad_norm": 0.009007874876260757, "learning_rate": 7.62192588115296e-07, "loss": 0.0009, "step": 254720 }, { "epoch": 9.632276831249293, "grad_norm": 0.001189779955893755, "learning_rate": 7.42850666767406e-07, "loss": 0.0007, "step": 254976 }, { "epoch": 9.641947791923236, "grad_norm": 0.006172012072056532, "learning_rate": 7.235087454195158e-07, "loss": 0.0008, "step": 255232 }, { "epoch": 9.651618752597182, "grad_norm": 0.0028820731677114964, "learning_rate": 7.041668240716256e-07, "loss": 0.0009, "step": 255488 }, { "epoch": 9.661289713271128, "grad_norm": 0.002549013588577509, "learning_rate": 6.848249027237356e-07, "loss": 0.0005, "step": 255744 }, { "epoch": 9.670960673945071, "grad_norm": 0.0011592130176723003, "learning_rate": 6.654829813758454e-07, "loss": 0.0005, "step": 256000 }, { "epoch": 9.680631634619017, "grad_norm": 0.009473592974245548, "learning_rate": 6.461410600279553e-07, "loss": 0.0006, "step": 256256 }, { "epoch": 9.690302595292962, "grad_norm": 0.013225371949374676, "learning_rate": 6.267991386800651e-07, "loss": 0.0006, "step": 256512 }, { "epoch": 9.699973555966906, "grad_norm": 0.0005315671442076564, "learning_rate": 6.0753277171244e-07, "loss": 0.0009, "step": 256768 }, { "epoch": 9.709644516640852, "grad_norm": 0.07691678404808044, "learning_rate": 5.881908503645499e-07, "loss": 0.001, "step": 257024 }, { "epoch": 9.719315477314797, "grad_norm": 0.00017236363783013076, "learning_rate": 5.688489290166599e-07, "loss": 0.0007, "step": 257280 }, { "epoch": 9.728986437988743, "grad_norm": 0.00099793984554708, "learning_rate": 5.495070076687697e-07, "loss": 0.0016, "step": 257536 }, { "epoch": 9.738657398662687, "grad_norm": 0.0005514703807421029, "learning_rate": 5.301650863208795e-07, "loss": 0.0004, "step": 257792 }, { "epoch": 9.748328359336632, "grad_norm": 0.0029203654266893864, "learning_rate": 5.108231649729894e-07, "loss": 0.0007, "step": 258048 }, { "epoch": 9.757999320010578, "grad_norm": 0.0004254644736647606, "learning_rate": 4.914812436250992e-07, "loss": 0.0016, "step": 258304 }, { "epoch": 9.767670280684523, "grad_norm": 0.0031043547205626965, "learning_rate": 4.721393222772091e-07, "loss": 0.0005, "step": 258560 }, { "epoch": 9.777341241358467, "grad_norm": 0.002943431492894888, "learning_rate": 4.5279740092931894e-07, "loss": 0.0008, "step": 258816 }, { "epoch": 9.787012202032413, "grad_norm": 0.00015551786054857075, "learning_rate": 4.334554795814288e-07, "loss": 0.0011, "step": 259072 }, { "epoch": 9.796683162706358, "grad_norm": 0.0021992865949869156, "learning_rate": 4.1411355823353867e-07, "loss": 0.0011, "step": 259328 }, { "epoch": 9.806354123380302, "grad_norm": 0.3576786518096924, "learning_rate": 3.947716368856485e-07, "loss": 0.0011, "step": 259584 }, { "epoch": 9.816025084054248, "grad_norm": 0.001682179281488061, "learning_rate": 3.7542971553775834e-07, "loss": 0.0007, "step": 259840 }, { "epoch": 9.825696044728193, "grad_norm": 0.0001894651068141684, "learning_rate": 3.560877941898682e-07, "loss": 0.0007, "step": 260096 }, { "epoch": 9.835367005402139, "grad_norm": 0.11490330845117569, "learning_rate": 3.3674587284197807e-07, "loss": 0.0006, "step": 260352 }, { "epoch": 9.845037966076083, "grad_norm": 0.00011810084106400609, "learning_rate": 3.1740395149408793e-07, "loss": 0.0005, "step": 260608 }, { "epoch": 9.854708926750028, "grad_norm": 0.0006803704309277236, "learning_rate": 2.9806203014619774e-07, "loss": 0.0005, "step": 260864 }, { "epoch": 9.864379887423974, "grad_norm": 0.0036548932548612356, "learning_rate": 2.787201087983076e-07, "loss": 0.0007, "step": 261120 }, { "epoch": 9.874050848097918, "grad_norm": 0.0011051982874050736, "learning_rate": 2.5937818745041747e-07, "loss": 0.0009, "step": 261376 }, { "epoch": 9.883721808771863, "grad_norm": 0.010463064536452293, "learning_rate": 2.4003626610252733e-07, "loss": 0.0006, "step": 261632 }, { "epoch": 9.893392769445809, "grad_norm": 0.0015552444383502007, "learning_rate": 2.2069434475463717e-07, "loss": 0.0004, "step": 261888 }, { "epoch": 9.903063730119754, "grad_norm": 0.00023192820663098246, "learning_rate": 2.0135242340674704e-07, "loss": 0.0005, "step": 262144 }, { "epoch": 9.912734690793698, "grad_norm": 2.180852174758911, "learning_rate": 1.8201050205885687e-07, "loss": 0.0008, "step": 262400 }, { "epoch": 9.922405651467644, "grad_norm": 0.2690439820289612, "learning_rate": 1.6266858071096674e-07, "loss": 0.0005, "step": 262656 }, { "epoch": 9.93207661214159, "grad_norm": 0.0014845479745417833, "learning_rate": 1.4340221374334178e-07, "loss": 0.0007, "step": 262912 }, { "epoch": 9.941747572815533, "grad_norm": 0.0010751072550192475, "learning_rate": 1.2406029239545164e-07, "loss": 0.0017, "step": 263168 }, { "epoch": 9.951418533489479, "grad_norm": 0.005477603990584612, "learning_rate": 1.0479392542782669e-07, "loss": 0.0003, "step": 263424 }, { "epoch": 9.961089494163424, "grad_norm": 0.003662185976281762, "learning_rate": 8.545200407993654e-08, "loss": 0.001, "step": 263680 }, { "epoch": 9.97076045483737, "grad_norm": 0.0006105359643697739, "learning_rate": 6.611008273204639e-08, "loss": 0.001, "step": 263936 }, { "epoch": 9.980431415511314, "grad_norm": 0.0005147479241713881, "learning_rate": 4.676816138415625e-08, "loss": 0.0006, "step": 264192 }, { "epoch": 9.99010237618526, "grad_norm": 0.002465909579768777, "learning_rate": 2.7426240036266103e-08, "loss": 0.0007, "step": 264448 }, { "epoch": 9.999773336859205, "grad_norm": 0.0049901618622243404, "learning_rate": 8.084318688375959e-09, "loss": 0.0008, "step": 264704 }, { "epoch": 10.0, "eval_f1_B-DISEASE": 0.8212511199804513, "eval_f1_B-MEDICATION": 0.9441194996850535, "eval_f1_B-PROCEDURE": 0.8438837081733407, "eval_f1_B-SYMPTOM": 0.8142008132624335, "eval_f1_I-DISEASE": 0.8531332420369243, "eval_f1_I-MEDICATION": 0.9504971609841921, "eval_f1_I-PROCEDURE": 0.855959918699867, "eval_f1_I-SYMPTOM": 0.8203061784176257, "eval_f1_O": 0.9279796337095003, "eval_f1_macro": 0.870147919438821, "eval_f1_micro": 0.8940428457113243, "eval_loss": 0.5246506333351135, "eval_precision_B-DISEASE": 0.8439357160793505, "eval_precision_B-MEDICATION": 0.9548598471059337, "eval_precision_B-PROCEDURE": 0.8662552367223749, "eval_precision_B-SYMPTOM": 0.8270493539504342, "eval_precision_I-DISEASE": 0.8624838159699522, "eval_precision_I-MEDICATION": 0.9581855315489627, "eval_precision_I-PROCEDURE": 0.8779157051309547, "eval_precision_I-SYMPTOM": 0.8292179488587081, "eval_precision_O": 0.9223528464068083, "eval_precision_macro": 0.8824728890859421, "eval_precision_micro": 0.8962622493770654, "eval_rauc_macro": 0.918932582829971, "eval_rauc_micro": 0.9393388823070546, "eval_recall_B-DISEASE": 0.7997541048623781, "eval_recall_B-MEDICATION": 0.9336180815091654, "eval_recall_B-PROCEDURE": 0.8226386036960985, "eval_recall_B-SYMPTOM": 0.8017453798767967, "eval_recall_I-DISEASE": 0.8439832411344849, "eval_recall_I-MEDICATION": 0.942931189506532, "eval_recall_I-PROCEDURE": 0.8350755220446033, "eval_recall_I-SYMPTOM": 0.8115839243498818, "eval_recall_O": 0.9336754945048357, "eval_recall_macro": 0.858333949053864, "eval_recall_micro": 0.8918344066604195, "eval_roc_auc_B-DISEASE": 0.898501130076159, "eval_roc_auc_B-MEDICATION": 0.9667188383374489, "eval_roc_auc_B-PROCEDURE": 0.9102252873491329, "eval_roc_auc_B-SYMPTOM": 0.8990597025985936, "eval_roc_auc_I-DISEASE": 0.9132048321619536, "eval_roc_auc_I-MEDICATION": 0.9711798590609849, "eval_roc_auc_I-PROCEDURE": 0.9118804724182935, "eval_roc_auc_I-SYMPTOM": 0.8927663421914375, "eval_roc_auc_O": 0.9068567812757345, "eval_runtime": 60.9219, "eval_samples_per_second": 178.474, "eval_steps_per_second": 22.324, "step": 264710 } ], "logging_steps": 256, "max_steps": 264710, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.5034084397459712e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }