{ "best_global_step": 60200, "best_metric": 0.6679060933922908, "best_model_checkpoint": "output/agent_mutation_detection/checkpoint-60200", "epoch": 1.3099136647811849, "eval_steps": 200, "global_step": 61600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001063241611023689, "grad_norm": 2.0224599838256836, "learning_rate": 1.9997916046442397e-05, "loss": 0.1352, "step": 50 }, { "epoch": 0.002126483222047378, "grad_norm": 3.915799856185913, "learning_rate": 1.9995789563220348e-05, "loss": 0.064, "step": 100 }, { "epoch": 0.003189724833071067, "grad_norm": 0.4908771812915802, "learning_rate": 1.99936630799983e-05, "loss": 0.0605, "step": 150 }, { "epoch": 0.004252966444094756, "grad_norm": 0.36717087030410767, "learning_rate": 1.999153659677625e-05, "loss": 0.0583, "step": 200 }, { "epoch": 0.004252966444094756, "eval_f1": 0.2558347806833659, "eval_loss": 0.05625838041305542, "eval_precision": 0.5278147182258462, "eval_recall": 0.1688350431741624, "eval_runtime": 1236.4204, "eval_samples_per_second": 76.066, "eval_steps_per_second": 4.755, "step": 200 }, { "epoch": 0.005316208055118445, "grad_norm": 0.6767643094062805, "learning_rate": 1.9989410113554205e-05, "loss": 0.0595, "step": 250 }, { "epoch": 0.006379449666142134, "grad_norm": 0.649048388004303, "learning_rate": 1.998728363033216e-05, "loss": 0.0491, "step": 300 }, { "epoch": 0.007442691277165823, "grad_norm": 0.7141726016998291, "learning_rate": 1.998515714711011e-05, "loss": 0.058, "step": 350 }, { "epoch": 0.008505932888189512, "grad_norm": 0.31631940603256226, "learning_rate": 1.9983030663888063e-05, "loss": 0.0458, "step": 400 }, { "epoch": 0.008505932888189512, "eval_f1": 0.4995703308633246, "eval_loss": 0.044822197407484055, "eval_precision": 0.6655448275861457, "eval_recall": 0.39985415251004636, "eval_runtime": 1229.472, "eval_samples_per_second": 76.496, "eval_steps_per_second": 4.782, "step": 400 }, { "epoch": 0.009569174499213202, "grad_norm": 0.6596254110336304, "learning_rate": 1.9980904180666017e-05, "loss": 0.046, "step": 450 }, { "epoch": 0.01063241611023689, "grad_norm": 1.0284258127212524, "learning_rate": 1.997877769744397e-05, "loss": 0.0346, "step": 500 }, { "epoch": 0.011695657721260579, "grad_norm": 5.875338077545166, "learning_rate": 1.997665121422192e-05, "loss": 0.0524, "step": 550 }, { "epoch": 0.012758899332284268, "grad_norm": 0.3553691804409027, "learning_rate": 1.9974524730999875e-05, "loss": 0.0407, "step": 600 }, { "epoch": 0.012758899332284268, "eval_f1": 0.5402613079320953, "eval_loss": 0.04106871783733368, "eval_precision": 0.6679358260925617, "eval_recall": 0.4535635955825638, "eval_runtime": 1229.027, "eval_samples_per_second": 76.524, "eval_steps_per_second": 4.783, "step": 600 }, { "epoch": 0.013822140943307958, "grad_norm": 0.8297381401062012, "learning_rate": 1.997239824777783e-05, "loss": 0.0446, "step": 650 }, { "epoch": 0.014885382554331646, "grad_norm": 0.7689228057861328, "learning_rate": 1.997027176455578e-05, "loss": 0.043, "step": 700 }, { "epoch": 0.015948624165355335, "grad_norm": 0.6189518570899963, "learning_rate": 1.9968145281333732e-05, "loss": 0.047, "step": 750 }, { "epoch": 0.017011865776379025, "grad_norm": 0.6153404712677002, "learning_rate": 1.9966018798111683e-05, "loss": 0.0391, "step": 800 }, { "epoch": 0.017011865776379025, "eval_f1": 0.570204822429824, "eval_loss": 0.03994419425725937, "eval_precision": 0.6857411245933434, "eval_recall": 0.48798691792208826, "eval_runtime": 1230.1207, "eval_samples_per_second": 76.456, "eval_steps_per_second": 4.779, "step": 800 }, { "epoch": 0.018075107387402714, "grad_norm": 0.6424123644828796, "learning_rate": 1.9963892314889636e-05, "loss": 0.0389, "step": 850 }, { "epoch": 0.019138348998426404, "grad_norm": 1.633712887763977, "learning_rate": 1.996176583166759e-05, "loss": 0.0406, "step": 900 }, { "epoch": 0.020201590609450093, "grad_norm": 1.547592043876648, "learning_rate": 1.9959639348445544e-05, "loss": 0.0366, "step": 950 }, { "epoch": 0.02126483222047378, "grad_norm": 0.9138901829719543, "learning_rate": 1.9957512865223494e-05, "loss": 0.0403, "step": 1000 }, { "epoch": 0.02126483222047378, "eval_f1": 0.5812023157159703, "eval_loss": 0.03851224109530449, "eval_precision": 0.6991921702655974, "eval_recall": 0.49728469540522413, "eval_runtime": 1229.2637, "eval_samples_per_second": 76.509, "eval_steps_per_second": 4.783, "step": 1000 }, { "epoch": 0.02232807383149747, "grad_norm": 0.6923019886016846, "learning_rate": 1.9955386382001448e-05, "loss": 0.0494, "step": 1050 }, { "epoch": 0.023391315442521158, "grad_norm": 0.9407742619514465, "learning_rate": 1.99532598987794e-05, "loss": 0.04, "step": 1100 }, { "epoch": 0.024454557053544847, "grad_norm": 0.831279993057251, "learning_rate": 1.9951133415557352e-05, "loss": 0.0391, "step": 1150 }, { "epoch": 0.025517798664568537, "grad_norm": 0.5083991289138794, "learning_rate": 1.9949006932335306e-05, "loss": 0.0339, "step": 1200 }, { "epoch": 0.025517798664568537, "eval_f1": 0.5393037812062605, "eval_loss": 0.03819730877876282, "eval_precision": 0.7519589727373492, "eval_recall": 0.4204109142538066, "eval_runtime": 1230.3344, "eval_samples_per_second": 76.443, "eval_steps_per_second": 4.778, "step": 1200 }, { "epoch": 0.026581040275592226, "grad_norm": 0.7319112420082092, "learning_rate": 1.9946880449113256e-05, "loss": 0.051, "step": 1250 }, { "epoch": 0.027644281886615916, "grad_norm": 0.6393922567367554, "learning_rate": 1.994475396589121e-05, "loss": 0.0408, "step": 1300 }, { "epoch": 0.028707523497639605, "grad_norm": 1.5947378873825073, "learning_rate": 1.9942627482669163e-05, "loss": 0.0347, "step": 1350 }, { "epoch": 0.02977076510866329, "grad_norm": 0.3754733204841614, "learning_rate": 1.9940500999447117e-05, "loss": 0.0348, "step": 1400 }, { "epoch": 0.02977076510866329, "eval_f1": 0.5603909332159157, "eval_loss": 0.03639717400074005, "eval_precision": 0.7647929983469712, "eval_recall": 0.4422051698515315, "eval_runtime": 1228.8698, "eval_samples_per_second": 76.534, "eval_steps_per_second": 4.784, "step": 1400 }, { "epoch": 0.03083400671968698, "grad_norm": 1.0370383262634277, "learning_rate": 1.993837451622507e-05, "loss": 0.0454, "step": 1450 }, { "epoch": 0.03189724833071067, "grad_norm": 0.9564969539642334, "learning_rate": 1.993624803300302e-05, "loss": 0.0327, "step": 1500 }, { "epoch": 0.03296048994173436, "grad_norm": 1.1920331716537476, "learning_rate": 1.9934121549780975e-05, "loss": 0.0409, "step": 1550 }, { "epoch": 0.03402373155275805, "grad_norm": 0.46415019035339355, "learning_rate": 1.9931995066558925e-05, "loss": 0.0386, "step": 1600 }, { "epoch": 0.03402373155275805, "eval_f1": 0.5857913632599847, "eval_loss": 0.037632912397384644, "eval_precision": 0.7203066678489943, "eval_recall": 0.4936108855262667, "eval_runtime": 1228.4704, "eval_samples_per_second": 76.559, "eval_steps_per_second": 4.786, "step": 1600 }, { "epoch": 0.03508697316378174, "grad_norm": 0.7176307439804077, "learning_rate": 1.992986858333688e-05, "loss": 0.0365, "step": 1650 }, { "epoch": 0.03615021477480543, "grad_norm": 0.8363370299339294, "learning_rate": 1.992774210011483e-05, "loss": 0.0408, "step": 1700 }, { "epoch": 0.03721345638582912, "grad_norm": 0.9761326909065247, "learning_rate": 1.9925615616892783e-05, "loss": 0.0421, "step": 1750 }, { "epoch": 0.03827669799685281, "grad_norm": 0.9966303110122681, "learning_rate": 1.9923489133670737e-05, "loss": 0.038, "step": 1800 }, { "epoch": 0.03827669799685281, "eval_f1": 0.5640765006550312, "eval_loss": 0.03619459271430969, "eval_precision": 0.7535372566403679, "eval_recall": 0.45074608725434084, "eval_runtime": 1230.0875, "eval_samples_per_second": 76.458, "eval_steps_per_second": 4.779, "step": 1800 }, { "epoch": 0.039339939607876497, "grad_norm": 1.326159119606018, "learning_rate": 1.992136265044869e-05, "loss": 0.0376, "step": 1850 }, { "epoch": 0.040403181218900186, "grad_norm": 1.29815673828125, "learning_rate": 1.9919236167226644e-05, "loss": 0.0437, "step": 1900 }, { "epoch": 0.04146642282992387, "grad_norm": 0.43758031725883484, "learning_rate": 1.9917109684004595e-05, "loss": 0.0376, "step": 1950 }, { "epoch": 0.04252966444094756, "grad_norm": 1.7897168397903442, "learning_rate": 1.9914983200782548e-05, "loss": 0.0431, "step": 2000 }, { "epoch": 0.04252966444094756, "eval_f1": 0.5966074378316643, "eval_loss": 0.04285940155386925, "eval_precision": 0.6419203136036653, "eval_recall": 0.5572700001657049, "eval_runtime": 1229.0109, "eval_samples_per_second": 76.525, "eval_steps_per_second": 4.784, "step": 2000 }, { "epoch": 0.04359290605197125, "grad_norm": 1.227124571800232, "learning_rate": 1.9912856717560502e-05, "loss": 0.0427, "step": 2050 }, { "epoch": 0.04465614766299494, "grad_norm": 0.6095845103263855, "learning_rate": 1.9910730234338452e-05, "loss": 0.0371, "step": 2100 }, { "epoch": 0.045719389274018626, "grad_norm": 0.3651985824108124, "learning_rate": 1.9908603751116403e-05, "loss": 0.033, "step": 2150 }, { "epoch": 0.046782630885042316, "grad_norm": 1.0842341184616089, "learning_rate": 1.9906477267894356e-05, "loss": 0.0348, "step": 2200 }, { "epoch": 0.046782630885042316, "eval_f1": 0.5868263816280419, "eval_loss": 0.03659769520163536, "eval_precision": 0.7187845392365152, "eval_recall": 0.4958041224013736, "eval_runtime": 246.8597, "eval_samples_per_second": 380.986, "eval_steps_per_second": 23.815, "step": 2200 }, { "epoch": 0.047845872496066005, "grad_norm": 4.103494644165039, "learning_rate": 1.990435078467231e-05, "loss": 0.0448, "step": 2250 }, { "epoch": 0.048909114107089695, "grad_norm": 0.294415682554245, "learning_rate": 1.9902224301450264e-05, "loss": 0.0328, "step": 2300 }, { "epoch": 0.049972355718113384, "grad_norm": 0.9377186894416809, "learning_rate": 1.9900097818228218e-05, "loss": 0.0389, "step": 2350 }, { "epoch": 0.051035597329137074, "grad_norm": 0.3619392216205597, "learning_rate": 1.9897971335006168e-05, "loss": 0.0329, "step": 2400 }, { "epoch": 0.051035597329137074, "eval_f1": 0.5959190653971372, "eval_loss": 0.037844084203243256, "eval_precision": 0.7048700441359006, "eval_recall": 0.5161399030997831, "eval_runtime": 246.6841, "eval_samples_per_second": 381.257, "eval_steps_per_second": 23.832, "step": 2400 }, { "epoch": 0.05209883894016076, "grad_norm": 0.594763457775116, "learning_rate": 1.989584485178412e-05, "loss": 0.0337, "step": 2450 }, { "epoch": 0.05316208055118445, "grad_norm": 1.4897336959838867, "learning_rate": 1.9893718368562075e-05, "loss": 0.0318, "step": 2500 }, { "epoch": 0.05422532216220814, "grad_norm": 0.5889609456062317, "learning_rate": 1.9891591885340026e-05, "loss": 0.0313, "step": 2550 }, { "epoch": 0.05528856377323183, "grad_norm": 0.8788466453552246, "learning_rate": 1.988946540211798e-05, "loss": 0.0391, "step": 2600 }, { "epoch": 0.05528856377323183, "eval_f1": 0.5911404487560454, "eval_loss": 0.03427541255950928, "eval_precision": 0.7393374363397472, "eval_recall": 0.49243416145977353, "eval_runtime": 246.4314, "eval_samples_per_second": 381.648, "eval_steps_per_second": 23.857, "step": 2600 }, { "epoch": 0.05635180538425552, "grad_norm": 0.32628586888313293, "learning_rate": 1.988733891889593e-05, "loss": 0.0354, "step": 2650 }, { "epoch": 0.05741504699527921, "grad_norm": 0.5749639272689819, "learning_rate": 1.9885212435673883e-05, "loss": 0.0348, "step": 2700 }, { "epoch": 0.05847828860630289, "grad_norm": 0.18759532272815704, "learning_rate": 1.9883085952451837e-05, "loss": 0.0344, "step": 2750 }, { "epoch": 0.05954153021732658, "grad_norm": 0.5907154083251953, "learning_rate": 1.988095946922979e-05, "loss": 0.0438, "step": 2800 }, { "epoch": 0.05954153021732658, "eval_f1": 0.6050872150612308, "eval_loss": 0.04124612361192703, "eval_precision": 0.6800656275635297, "eval_recall": 0.5450000276226006, "eval_runtime": 245.9961, "eval_samples_per_second": 382.323, "eval_steps_per_second": 23.899, "step": 2800 }, { "epoch": 0.06060477182835027, "grad_norm": 1.2432407140731812, "learning_rate": 1.987883298600774e-05, "loss": 0.0333, "step": 2850 }, { "epoch": 0.06166801343937396, "grad_norm": 0.24573953449726105, "learning_rate": 1.9876706502785695e-05, "loss": 0.037, "step": 2900 }, { "epoch": 0.06273125505039766, "grad_norm": 0.3732072114944458, "learning_rate": 1.987458001956365e-05, "loss": 0.0355, "step": 2950 }, { "epoch": 0.06379449666142134, "grad_norm": 0.9668097496032715, "learning_rate": 1.98724535363416e-05, "loss": 0.0368, "step": 3000 }, { "epoch": 0.06379449666142134, "eval_f1": 0.570883376319162, "eval_loss": 0.03370242938399315, "eval_precision": 0.7603496452074745, "eval_recall": 0.4570053753639029, "eval_runtime": 245.9627, "eval_samples_per_second": 382.375, "eval_steps_per_second": 23.902, "step": 3000 }, { "epoch": 0.06485773827244504, "grad_norm": 0.20526662468910217, "learning_rate": 1.9870327053119553e-05, "loss": 0.0342, "step": 3050 }, { "epoch": 0.06592097988346872, "grad_norm": 0.8243491053581238, "learning_rate": 1.9868200569897503e-05, "loss": 0.0406, "step": 3100 }, { "epoch": 0.0669842214944924, "grad_norm": 4.765167236328125, "learning_rate": 1.9866074086675457e-05, "loss": 0.0307, "step": 3150 }, { "epoch": 0.0680474631055161, "grad_norm": 0.478315144777298, "learning_rate": 1.986394760345341e-05, "loss": 0.0395, "step": 3200 }, { "epoch": 0.0680474631055161, "eval_f1": 0.6024856243336243, "eval_loss": 0.03363606333732605, "eval_precision": 0.7571761833490174, "eval_recall": 0.5002789885697277, "eval_runtime": 245.8922, "eval_samples_per_second": 382.485, "eval_steps_per_second": 23.909, "step": 3200 }, { "epoch": 0.06911070471653978, "grad_norm": 0.5281405448913574, "learning_rate": 1.9861821120231364e-05, "loss": 0.0371, "step": 3250 }, { "epoch": 0.07017394632756348, "grad_norm": 0.43021509051322937, "learning_rate": 1.9859694637009315e-05, "loss": 0.0357, "step": 3300 }, { "epoch": 0.07123718793858716, "grad_norm": 0.28677526116371155, "learning_rate": 1.9857568153787268e-05, "loss": 0.0334, "step": 3350 }, { "epoch": 0.07230042954961086, "grad_norm": 0.9107722640037537, "learning_rate": 1.9855441670565222e-05, "loss": 0.0317, "step": 3400 }, { "epoch": 0.07230042954961086, "eval_f1": 0.6010914494859544, "eval_loss": 0.03487297147512436, "eval_precision": 0.7197746730678277, "eval_recall": 0.5160073144725725, "eval_runtime": 245.8373, "eval_samples_per_second": 382.57, "eval_steps_per_second": 23.914, "step": 3400 }, { "epoch": 0.07336367116063454, "grad_norm": 3.759284019470215, "learning_rate": 1.9853315187343172e-05, "loss": 0.0355, "step": 3450 }, { "epoch": 0.07442691277165824, "grad_norm": 0.934769868850708, "learning_rate": 1.9851188704121126e-05, "loss": 0.0313, "step": 3500 }, { "epoch": 0.07549015438268192, "grad_norm": 0.6488043069839478, "learning_rate": 1.9849062220899076e-05, "loss": 0.0349, "step": 3550 }, { "epoch": 0.07655339599370561, "grad_norm": 0.14731314778327942, "learning_rate": 1.984693573767703e-05, "loss": 0.0363, "step": 3600 }, { "epoch": 0.07655339599370561, "eval_f1": 0.5901348599379704, "eval_loss": 0.03261386603116989, "eval_precision": 0.7908803133963229, "eval_recall": 0.4706675284927175, "eval_runtime": 246.1243, "eval_samples_per_second": 382.124, "eval_steps_per_second": 23.886, "step": 3600 }, { "epoch": 0.0776166376047293, "grad_norm": 0.5412437319755554, "learning_rate": 1.9844809254454984e-05, "loss": 0.0346, "step": 3650 }, { "epoch": 0.07867987921575299, "grad_norm": 0.22170951962471008, "learning_rate": 1.9842682771232938e-05, "loss": 0.0395, "step": 3700 }, { "epoch": 0.07974312082677668, "grad_norm": 0.7539703845977783, "learning_rate": 1.9840556288010888e-05, "loss": 0.0333, "step": 3750 }, { "epoch": 0.08080636243780037, "grad_norm": 0.4186685085296631, "learning_rate": 1.983842980478884e-05, "loss": 0.0368, "step": 3800 }, { "epoch": 0.08080636243780037, "eval_f1": 0.5902625567691191, "eval_loss": 0.03258601203560829, "eval_precision": 0.7741235542005941, "eval_recall": 0.4769765373374835, "eval_runtime": 246.1861, "eval_samples_per_second": 382.028, "eval_steps_per_second": 23.88, "step": 3800 }, { "epoch": 0.08186960404882405, "grad_norm": 1.1732081174850464, "learning_rate": 1.9836303321566795e-05, "loss": 0.0313, "step": 3850 }, { "epoch": 0.08293284565984774, "grad_norm": 1.2736968994140625, "learning_rate": 1.983417683834475e-05, "loss": 0.0335, "step": 3900 }, { "epoch": 0.08399608727087143, "grad_norm": 0.8656672239303589, "learning_rate": 1.98320503551227e-05, "loss": 0.0338, "step": 3950 }, { "epoch": 0.08505932888189512, "grad_norm": 0.6713738441467285, "learning_rate": 1.9829923871900653e-05, "loss": 0.0341, "step": 4000 }, { "epoch": 0.08505932888189512, "eval_f1": 0.6087715149074607, "eval_loss": 0.03438347950577736, "eval_precision": 0.6981788813127565, "eval_recall": 0.5396633353773782, "eval_runtime": 245.8873, "eval_samples_per_second": 382.492, "eval_steps_per_second": 23.909, "step": 4000 }, { "epoch": 0.08612257049291881, "grad_norm": 0.44973260164260864, "learning_rate": 1.9827797388678603e-05, "loss": 0.0281, "step": 4050 }, { "epoch": 0.0871858121039425, "grad_norm": 0.3566531836986542, "learning_rate": 1.9825670905456557e-05, "loss": 0.0312, "step": 4100 }, { "epoch": 0.08824905371496619, "grad_norm": 0.7474827766418457, "learning_rate": 1.982354442223451e-05, "loss": 0.0352, "step": 4150 }, { "epoch": 0.08931229532598987, "grad_norm": 1.2181198596954346, "learning_rate": 1.9821417939012465e-05, "loss": 0.0349, "step": 4200 }, { "epoch": 0.08931229532598987, "eval_f1": 0.6105893372636159, "eval_loss": 0.03339192643761635, "eval_precision": 0.7360171473109325, "eval_recall": 0.5216865273380887, "eval_runtime": 245.9977, "eval_samples_per_second": 382.321, "eval_steps_per_second": 23.899, "step": 4200 }, { "epoch": 0.09037553693701357, "grad_norm": 0.44467592239379883, "learning_rate": 1.9819291455790415e-05, "loss": 0.0326, "step": 4250 }, { "epoch": 0.09143877854803725, "grad_norm": 1.221679449081421, "learning_rate": 1.981716497256837e-05, "loss": 0.0328, "step": 4300 }, { "epoch": 0.09250202015906095, "grad_norm": 0.9548826217651367, "learning_rate": 1.9815038489346322e-05, "loss": 0.0326, "step": 4350 }, { "epoch": 0.09356526177008463, "grad_norm": 1.1725468635559082, "learning_rate": 1.9812912006124273e-05, "loss": 0.0401, "step": 4400 }, { "epoch": 0.09356526177008463, "eval_f1": 0.5951119350270118, "eval_loss": 0.032608892768621445, "eval_precision": 0.7571788198147581, "eval_recall": 0.4901912038494627, "eval_runtime": 246.162, "eval_samples_per_second": 382.065, "eval_steps_per_second": 23.883, "step": 4400 }, { "epoch": 0.09462850338110833, "grad_norm": 0.9927505850791931, "learning_rate": 1.9810785522902226e-05, "loss": 0.0316, "step": 4450 }, { "epoch": 0.09569174499213201, "grad_norm": 0.7597560286521912, "learning_rate": 1.9808659039680177e-05, "loss": 0.0316, "step": 4500 }, { "epoch": 0.09675498660315571, "grad_norm": 0.4319681227207184, "learning_rate": 1.980653255645813e-05, "loss": 0.0356, "step": 4550 }, { "epoch": 0.09781822821417939, "grad_norm": 0.39425384998321533, "learning_rate": 1.9804406073236084e-05, "loss": 0.0375, "step": 4600 }, { "epoch": 0.09781822821417939, "eval_f1": 0.6037919989345099, "eval_loss": 0.03179575875401497, "eval_precision": 0.7490630574602926, "eval_recall": 0.505715122285358, "eval_runtime": 246.2125, "eval_samples_per_second": 381.987, "eval_steps_per_second": 23.878, "step": 4600 }, { "epoch": 0.09888146982520309, "grad_norm": 0.2628476321697235, "learning_rate": 1.9802279590014038e-05, "loss": 0.0347, "step": 4650 }, { "epoch": 0.09994471143622677, "grad_norm": 0.6132848262786865, "learning_rate": 1.9800153106791988e-05, "loss": 0.0336, "step": 4700 }, { "epoch": 0.10100795304725045, "grad_norm": 0.45675528049468994, "learning_rate": 1.9798026623569942e-05, "loss": 0.0333, "step": 4750 }, { "epoch": 0.10207119465827415, "grad_norm": 0.38246795535087585, "learning_rate": 1.9795900140347896e-05, "loss": 0.0317, "step": 4800 }, { "epoch": 0.10207119465827415, "eval_f1": 0.6140707165730679, "eval_loss": 0.033206865191459656, "eval_precision": 0.7292742964808294, "eval_recall": 0.5302992635806371, "eval_runtime": 246.2122, "eval_samples_per_second": 381.988, "eval_steps_per_second": 23.878, "step": 4800 }, { "epoch": 0.10313443626929783, "grad_norm": 0.433243066072464, "learning_rate": 1.9793773657125846e-05, "loss": 0.0338, "step": 4850 }, { "epoch": 0.10419767788032153, "grad_norm": 0.45333361625671387, "learning_rate": 1.97916471739038e-05, "loss": 0.0365, "step": 4900 }, { "epoch": 0.10526091949134521, "grad_norm": 0.2312437891960144, "learning_rate": 1.978952069068175e-05, "loss": 0.0326, "step": 4950 }, { "epoch": 0.1063241611023689, "grad_norm": 2.422546625137329, "learning_rate": 1.9787394207459704e-05, "loss": 0.0359, "step": 5000 }, { "epoch": 0.1063241611023689, "eval_f1": 0.6153732715067777, "eval_loss": 0.03225712850689888, "eval_precision": 0.7326745516977694, "eval_recall": 0.5304484257862488, "eval_runtime": 246.257, "eval_samples_per_second": 381.918, "eval_steps_per_second": 23.873, "step": 5000 }, { "epoch": 0.10738740271339259, "grad_norm": 0.8786187171936035, "learning_rate": 1.9785267724237657e-05, "loss": 0.0326, "step": 5050 }, { "epoch": 0.10845064432441628, "grad_norm": 0.3744671642780304, "learning_rate": 1.978314124101561e-05, "loss": 0.0285, "step": 5100 }, { "epoch": 0.10951388593543997, "grad_norm": 0.6058568358421326, "learning_rate": 1.978101475779356e-05, "loss": 0.032, "step": 5150 }, { "epoch": 0.11057712754646366, "grad_norm": 0.5622885823249817, "learning_rate": 1.9778888274571515e-05, "loss": 0.0297, "step": 5200 }, { "epoch": 0.11057712754646366, "eval_f1": 0.6083365412310493, "eval_loss": 0.032072387635707855, "eval_precision": 0.7444088179556908, "eval_recall": 0.5143223340017725, "eval_runtime": 246.3443, "eval_samples_per_second": 381.783, "eval_steps_per_second": 23.865, "step": 5200 }, { "epoch": 0.11164036915748735, "grad_norm": 0.7207121253013611, "learning_rate": 1.977676179134947e-05, "loss": 0.04, "step": 5250 }, { "epoch": 0.11270361076851104, "grad_norm": 0.7473412752151489, "learning_rate": 1.9774635308127423e-05, "loss": 0.0299, "step": 5300 }, { "epoch": 0.11376685237953472, "grad_norm": 0.9424649477005005, "learning_rate": 1.9772508824905373e-05, "loss": 0.029, "step": 5350 }, { "epoch": 0.11483009399055842, "grad_norm": 2.3133373260498047, "learning_rate": 1.9770382341683327e-05, "loss": 0.0333, "step": 5400 }, { "epoch": 0.11483009399055842, "eval_f1": 0.6064283237505493, "eval_loss": 0.031251732259988785, "eval_precision": 0.742699283340614, "eval_recall": 0.5124108479594879, "eval_runtime": 247.4391, "eval_samples_per_second": 380.094, "eval_steps_per_second": 23.759, "step": 5400 }, { "epoch": 0.1158933356015821, "grad_norm": 0.49478137493133545, "learning_rate": 1.9768255858461277e-05, "loss": 0.0307, "step": 5450 }, { "epoch": 0.11695657721260579, "grad_norm": 1.2868428230285645, "learning_rate": 1.976612937523923e-05, "loss": 0.0284, "step": 5500 }, { "epoch": 0.11801981882362948, "grad_norm": 0.5433647036552429, "learning_rate": 1.9764002892017185e-05, "loss": 0.0355, "step": 5550 }, { "epoch": 0.11908306043465317, "grad_norm": 0.3346336781978607, "learning_rate": 1.9761876408795135e-05, "loss": 0.032, "step": 5600 }, { "epoch": 0.11908306043465317, "eval_f1": 0.5881460148250874, "eval_loss": 0.03162701800465584, "eval_precision": 0.8077019568488935, "eval_recall": 0.46244150907953313, "eval_runtime": 247.2064, "eval_samples_per_second": 380.451, "eval_steps_per_second": 23.782, "step": 5600 }, { "epoch": 0.12014630204567686, "grad_norm": 0.5857412219047546, "learning_rate": 1.975974992557309e-05, "loss": 0.0332, "step": 5650 }, { "epoch": 0.12120954365670054, "grad_norm": 0.4985852539539337, "learning_rate": 1.9757623442351042e-05, "loss": 0.0333, "step": 5700 }, { "epoch": 0.12227278526772424, "grad_norm": 1.6267385482788086, "learning_rate": 1.9755496959128996e-05, "loss": 0.0343, "step": 5750 }, { "epoch": 0.12333602687874792, "grad_norm": 1.5325751304626465, "learning_rate": 1.9753370475906946e-05, "loss": 0.0336, "step": 5800 }, { "epoch": 0.12333602687874792, "eval_f1": 0.6155912359393022, "eval_loss": 0.03515153005719185, "eval_precision": 0.6937255825551596, "eval_recall": 0.5532757677709889, "eval_runtime": 247.256, "eval_samples_per_second": 380.375, "eval_steps_per_second": 23.777, "step": 5800 }, { "epoch": 0.12439926848977162, "grad_norm": 1.3889185190200806, "learning_rate": 1.97512439926849e-05, "loss": 0.0326, "step": 5850 }, { "epoch": 0.12546251010079532, "grad_norm": 0.5719237923622131, "learning_rate": 1.974911750946285e-05, "loss": 0.0306, "step": 5900 }, { "epoch": 0.12652575171181898, "grad_norm": 0.5563014149665833, "learning_rate": 1.9746991026240804e-05, "loss": 0.0434, "step": 5950 }, { "epoch": 0.12758899332284268, "grad_norm": 0.4965690076351166, "learning_rate": 1.9744864543018758e-05, "loss": 0.0347, "step": 6000 }, { "epoch": 0.12758899332284268, "eval_f1": 0.5760647714160846, "eval_loss": 0.031172040849924088, "eval_precision": 0.843236853875062, "eval_recall": 0.4374596019026226, "eval_runtime": 246.9998, "eval_samples_per_second": 380.77, "eval_steps_per_second": 23.802, "step": 6000 }, { "epoch": 0.12865223493386638, "grad_norm": 0.4500654339790344, "learning_rate": 1.9742738059796708e-05, "loss": 0.0328, "step": 6050 }, { "epoch": 0.12971547654489007, "grad_norm": 0.9213146567344666, "learning_rate": 1.9740611576574662e-05, "loss": 0.0325, "step": 6100 }, { "epoch": 0.13077871815591374, "grad_norm": 0.3271268606185913, "learning_rate": 1.9738485093352616e-05, "loss": 0.0319, "step": 6150 }, { "epoch": 0.13184195976693744, "grad_norm": 0.8362289071083069, "learning_rate": 1.973635861013057e-05, "loss": 0.027, "step": 6200 }, { "epoch": 0.13184195976693744, "eval_f1": 0.6159448152333887, "eval_loss": 0.03375120833516121, "eval_precision": 0.7436885463725169, "eval_recall": 0.525653137102136, "eval_runtime": 246.6266, "eval_samples_per_second": 381.346, "eval_steps_per_second": 23.838, "step": 6200 }, { "epoch": 0.13290520137796114, "grad_norm": 0.6590610146522522, "learning_rate": 1.973423212690852e-05, "loss": 0.0321, "step": 6250 }, { "epoch": 0.1339684429889848, "grad_norm": 0.3309442400932312, "learning_rate": 1.9732105643686473e-05, "loss": 0.0299, "step": 6300 }, { "epoch": 0.1350316846000085, "grad_norm": 1.3381755352020264, "learning_rate": 1.9729979160464424e-05, "loss": 0.0316, "step": 6350 }, { "epoch": 0.1360949262110322, "grad_norm": 0.7886656522750854, "learning_rate": 1.9727852677242377e-05, "loss": 0.0273, "step": 6400 }, { "epoch": 0.1360949262110322, "eval_f1": 0.6116905022096355, "eval_loss": 0.03411262482404709, "eval_precision": 0.757900151878827, "eval_recall": 0.5127699421581831, "eval_runtime": 246.7094, "eval_samples_per_second": 381.218, "eval_steps_per_second": 23.83, "step": 6400 }, { "epoch": 0.1371581678220559, "grad_norm": 0.47245463728904724, "learning_rate": 1.972572619402033e-05, "loss": 0.0372, "step": 6450 }, { "epoch": 0.13822140943307956, "grad_norm": 0.8163419961929321, "learning_rate": 1.972359971079828e-05, "loss": 0.0265, "step": 6500 }, { "epoch": 0.13928465104410326, "grad_norm": 0.559586763381958, "learning_rate": 1.9721473227576235e-05, "loss": 0.0302, "step": 6550 }, { "epoch": 0.14034789265512695, "grad_norm": 0.45105987787246704, "learning_rate": 1.971934674435419e-05, "loss": 0.0312, "step": 6600 }, { "epoch": 0.14034789265512695, "eval_f1": 0.6146862199219396, "eval_loss": 0.034724559634923935, "eval_precision": 0.7131737706352564, "eval_recall": 0.540099772941946, "eval_runtime": 246.7514, "eval_samples_per_second": 381.153, "eval_steps_per_second": 23.826, "step": 6600 }, { "epoch": 0.14141113426615065, "grad_norm": 0.905709981918335, "learning_rate": 1.9717220261132143e-05, "loss": 0.031, "step": 6650 }, { "epoch": 0.14247437587717432, "grad_norm": 0.7808261513710022, "learning_rate": 1.9715093777910096e-05, "loss": 0.0282, "step": 6700 }, { "epoch": 0.14353761748819802, "grad_norm": 0.5361971855163574, "learning_rate": 1.9712967294688047e-05, "loss": 0.0339, "step": 6750 }, { "epoch": 0.1446008590992217, "grad_norm": 0.4592117965221405, "learning_rate": 1.9710840811466e-05, "loss": 0.0334, "step": 6800 }, { "epoch": 0.1446008590992217, "eval_f1": 0.5991980599505855, "eval_loss": 0.030962416902184486, "eval_precision": 0.7486812577115786, "eval_recall": 0.49947240775419727, "eval_runtime": 246.7964, "eval_samples_per_second": 381.083, "eval_steps_per_second": 23.821, "step": 6800 }, { "epoch": 0.1456641007102454, "grad_norm": 0.4033856987953186, "learning_rate": 1.970871432824395e-05, "loss": 0.0339, "step": 6850 }, { "epoch": 0.14672734232126908, "grad_norm": 0.34183260798454285, "learning_rate": 1.9706587845021904e-05, "loss": 0.0281, "step": 6900 }, { "epoch": 0.14779058393229277, "grad_norm": 1.5102756023406982, "learning_rate": 1.9704461361799855e-05, "loss": 0.0245, "step": 6950 }, { "epoch": 0.14885382554331647, "grad_norm": 1.2105891704559326, "learning_rate": 1.970233487857781e-05, "loss": 0.0301, "step": 7000 }, { "epoch": 0.14885382554331647, "eval_f1": 0.6025215927411743, "eval_loss": 0.03057522512972355, "eval_precision": 0.7784437949560914, "eval_recall": 0.49145632033409614, "eval_runtime": 246.9018, "eval_samples_per_second": 380.921, "eval_steps_per_second": 23.811, "step": 7000 }, { "epoch": 0.14991706715434014, "grad_norm": 0.5836089253425598, "learning_rate": 1.9700208395355762e-05, "loss": 0.0387, "step": 7050 }, { "epoch": 0.15098030876536384, "grad_norm": 0.32872989773750305, "learning_rate": 1.9698081912133716e-05, "loss": 0.0329, "step": 7100 }, { "epoch": 0.15204355037638753, "grad_norm": 0.6530079245567322, "learning_rate": 1.969595542891167e-05, "loss": 0.0316, "step": 7150 }, { "epoch": 0.15310679198741123, "grad_norm": 0.5003499984741211, "learning_rate": 1.969382894568962e-05, "loss": 0.0285, "step": 7200 }, { "epoch": 0.15310679198741123, "eval_f1": 0.5787710079490422, "eval_loss": 0.031062854453921318, "eval_precision": 0.7959710643893932, "eval_recall": 0.4546961234399868, "eval_runtime": 245.6715, "eval_samples_per_second": 382.828, "eval_steps_per_second": 23.93, "step": 7200 }, { "epoch": 0.1541700335984349, "grad_norm": 0.2270938754081726, "learning_rate": 1.9691702462467574e-05, "loss": 0.034, "step": 7250 }, { "epoch": 0.1552332752094586, "grad_norm": 0.35560426115989685, "learning_rate": 1.9689575979245524e-05, "loss": 0.0296, "step": 7300 }, { "epoch": 0.1562965168204823, "grad_norm": 0.47759827971458435, "learning_rate": 1.9687449496023478e-05, "loss": 0.0271, "step": 7350 }, { "epoch": 0.15735975843150599, "grad_norm": 0.7571625113487244, "learning_rate": 1.968532301280143e-05, "loss": 0.0263, "step": 7400 }, { "epoch": 0.15735975843150599, "eval_f1": 0.6204896837845161, "eval_loss": 0.03159801661968231, "eval_precision": 0.7687202011625002, "eval_recall": 0.5201838562297031, "eval_runtime": 245.8702, "eval_samples_per_second": 382.519, "eval_steps_per_second": 23.911, "step": 7400 }, { "epoch": 0.15842300004252965, "grad_norm": 0.6230514645576477, "learning_rate": 1.9683196529579382e-05, "loss": 0.0274, "step": 7450 }, { "epoch": 0.15948624165355335, "grad_norm": 0.7669976949691772, "learning_rate": 1.9681070046357336e-05, "loss": 0.037, "step": 7500 }, { "epoch": 0.16054948326457705, "grad_norm": 1.527186393737793, "learning_rate": 1.967894356313529e-05, "loss": 0.0302, "step": 7550 }, { "epoch": 0.16161272487560074, "grad_norm": 0.2846198081970215, "learning_rate": 1.9676817079913243e-05, "loss": 0.0328, "step": 7600 }, { "epoch": 0.16161272487560074, "eval_f1": 0.5986021115232827, "eval_loss": 0.02971031703054905, "eval_precision": 0.8224348699329076, "eval_recall": 0.47054046439164077, "eval_runtime": 245.8505, "eval_samples_per_second": 382.55, "eval_steps_per_second": 23.913, "step": 7600 }, { "epoch": 0.1626759664866244, "grad_norm": 0.47096365690231323, "learning_rate": 1.9674690596691193e-05, "loss": 0.0327, "step": 7650 }, { "epoch": 0.1637392080976481, "grad_norm": 1.5292922258377075, "learning_rate": 1.9672564113469147e-05, "loss": 0.0232, "step": 7700 }, { "epoch": 0.1648024497086718, "grad_norm": 2.2596628665924072, "learning_rate": 1.9670437630247097e-05, "loss": 0.0292, "step": 7750 }, { "epoch": 0.16586569131969547, "grad_norm": 0.2346176654100418, "learning_rate": 1.966831114702505e-05, "loss": 0.0307, "step": 7800 }, { "epoch": 0.16586569131969547, "eval_f1": 0.6220962099696077, "eval_loss": 0.030883532017469406, "eval_precision": 0.748622455720345, "eval_recall": 0.532155504361584, "eval_runtime": 245.6534, "eval_samples_per_second": 382.857, "eval_steps_per_second": 23.932, "step": 7800 }, { "epoch": 0.16692893293071917, "grad_norm": 0.5998397469520569, "learning_rate": 1.9666184663803005e-05, "loss": 0.0326, "step": 7850 }, { "epoch": 0.16799217454174287, "grad_norm": 0.2451263666152954, "learning_rate": 1.9664058180580955e-05, "loss": 0.0335, "step": 7900 }, { "epoch": 0.16905541615276656, "grad_norm": 0.9765504598617554, "learning_rate": 1.966193169735891e-05, "loss": 0.0287, "step": 7950 }, { "epoch": 0.17011865776379023, "grad_norm": 0.7540216445922852, "learning_rate": 1.9659805214136863e-05, "loss": 0.0329, "step": 8000 }, { "epoch": 0.17011865776379023, "eval_f1": 0.6129896358430026, "eval_loss": 0.029405973851680756, "eval_precision": 0.7707925349400979, "eval_recall": 0.508819905972537, "eval_runtime": 245.7622, "eval_samples_per_second": 382.687, "eval_steps_per_second": 23.921, "step": 8000 }, { "epoch": 0.17118189937481393, "grad_norm": 0.4953705370426178, "learning_rate": 1.9657678730914816e-05, "loss": 0.0348, "step": 8050 }, { "epoch": 0.17224514098583762, "grad_norm": 0.4945722222328186, "learning_rate": 1.9655552247692767e-05, "loss": 0.0291, "step": 8100 }, { "epoch": 0.17330838259686132, "grad_norm": 1.9098111391067505, "learning_rate": 1.965342576447072e-05, "loss": 0.0316, "step": 8150 }, { "epoch": 0.174371624207885, "grad_norm": 0.5058334469795227, "learning_rate": 1.9651299281248674e-05, "loss": 0.0295, "step": 8200 }, { "epoch": 0.174371624207885, "eval_f1": 0.6046226383882308, "eval_loss": 0.0298138614743948, "eval_precision": 0.7885360392379169, "eval_recall": 0.49027407174146925, "eval_runtime": 245.7466, "eval_samples_per_second": 382.711, "eval_steps_per_second": 23.923, "step": 8200 }, { "epoch": 0.1754348658189087, "grad_norm": 0.765110194683075, "learning_rate": 1.9649172798026624e-05, "loss": 0.0326, "step": 8250 }, { "epoch": 0.17649810742993238, "grad_norm": 0.16642601788043976, "learning_rate": 1.9647046314804578e-05, "loss": 0.029, "step": 8300 }, { "epoch": 0.17756134904095608, "grad_norm": 0.3110131621360779, "learning_rate": 1.964491983158253e-05, "loss": 0.0311, "step": 8350 }, { "epoch": 0.17862459065197975, "grad_norm": 0.5879064202308655, "learning_rate": 1.9642793348360482e-05, "loss": 0.0321, "step": 8400 }, { "epoch": 0.17862459065197975, "eval_f1": 0.6123469574367182, "eval_loss": 0.02992420829832554, "eval_precision": 0.7867517624716412, "eval_recall": 0.5012347315908701, "eval_runtime": 245.8617, "eval_samples_per_second": 382.532, "eval_steps_per_second": 23.912, "step": 8400 }, { "epoch": 0.17968783226300344, "grad_norm": 0.22766068577766418, "learning_rate": 1.9640666865138436e-05, "loss": 0.0319, "step": 8450 }, { "epoch": 0.18075107387402714, "grad_norm": 0.3313518166542053, "learning_rate": 1.963854038191639e-05, "loss": 0.0301, "step": 8500 }, { "epoch": 0.1818143154850508, "grad_norm": 0.831730842590332, "learning_rate": 1.9636413898694343e-05, "loss": 0.0273, "step": 8550 }, { "epoch": 0.1828775570960745, "grad_norm": 0.29557257890701294, "learning_rate": 1.9634287415472294e-05, "loss": 0.0266, "step": 8600 }, { "epoch": 0.1828775570960745, "eval_f1": 0.6202895104852048, "eval_loss": 0.03009197674691677, "eval_precision": 0.755262719725922, "eval_recall": 0.5262442613984495, "eval_runtime": 245.7558, "eval_samples_per_second": 382.697, "eval_steps_per_second": 23.922, "step": 8600 }, { "epoch": 0.1839407987070982, "grad_norm": 0.9557961225509644, "learning_rate": 1.9632160932250247e-05, "loss": 0.0299, "step": 8650 }, { "epoch": 0.1850040403181219, "grad_norm": 0.49195942282676697, "learning_rate": 1.9630034449028198e-05, "loss": 0.0319, "step": 8700 }, { "epoch": 0.18606728192914557, "grad_norm": 0.5787323713302612, "learning_rate": 1.962790796580615e-05, "loss": 0.0273, "step": 8750 }, { "epoch": 0.18713052354016926, "grad_norm": 0.5464428067207336, "learning_rate": 1.9625781482584102e-05, "loss": 0.0307, "step": 8800 }, { "epoch": 0.18713052354016926, "eval_f1": 0.5956851119164728, "eval_loss": 0.028990615159273148, "eval_precision": 0.8055866868836415, "eval_recall": 0.47255691643046704, "eval_runtime": 245.7492, "eval_samples_per_second": 382.707, "eval_steps_per_second": 23.923, "step": 8800 }, { "epoch": 0.18819376515119296, "grad_norm": 1.4530125856399536, "learning_rate": 1.9623654999362056e-05, "loss": 0.031, "step": 8850 }, { "epoch": 0.18925700676221666, "grad_norm": 0.5314801335334778, "learning_rate": 1.962152851614001e-05, "loss": 0.0308, "step": 8900 }, { "epoch": 0.19032024837324032, "grad_norm": 0.10746732354164124, "learning_rate": 1.9619402032917963e-05, "loss": 0.0327, "step": 8950 }, { "epoch": 0.19138348998426402, "grad_norm": 0.6953580379486084, "learning_rate": 1.9617275549695917e-05, "loss": 0.0278, "step": 9000 }, { "epoch": 0.19138348998426402, "eval_f1": 0.6090386014492196, "eval_loss": 0.03040657937526703, "eval_precision": 0.7875187297914717, "eval_recall": 0.49651126174649624, "eval_runtime": 245.8309, "eval_samples_per_second": 382.58, "eval_steps_per_second": 23.915, "step": 9000 }, { "epoch": 0.19244673159528772, "grad_norm": 1.4189220666885376, "learning_rate": 1.9615149066473867e-05, "loss": 0.0339, "step": 9050 }, { "epoch": 0.19350997320631141, "grad_norm": 1.2121318578720093, "learning_rate": 1.961302258325182e-05, "loss": 0.027, "step": 9100 }, { "epoch": 0.19457321481733508, "grad_norm": 0.6132772564888, "learning_rate": 1.961089610002977e-05, "loss": 0.0292, "step": 9150 }, { "epoch": 0.19563645642835878, "grad_norm": 0.4103599488735199, "learning_rate": 1.9608769616807725e-05, "loss": 0.0262, "step": 9200 }, { "epoch": 0.19563645642835878, "eval_f1": 0.6266044636220642, "eval_loss": 0.02957482449710369, "eval_precision": 0.7689452371769533, "eval_recall": 0.5287302981586463, "eval_runtime": 245.826, "eval_samples_per_second": 382.588, "eval_steps_per_second": 23.915, "step": 9200 }, { "epoch": 0.19669969803938248, "grad_norm": 0.413341760635376, "learning_rate": 1.9606643133585675e-05, "loss": 0.0298, "step": 9250 }, { "epoch": 0.19776293965040617, "grad_norm": 1.0433675050735474, "learning_rate": 1.960451665036363e-05, "loss": 0.0293, "step": 9300 }, { "epoch": 0.19882618126142984, "grad_norm": 0.6197942495346069, "learning_rate": 1.9602390167141583e-05, "loss": 0.0272, "step": 9350 }, { "epoch": 0.19988942287245354, "grad_norm": 1.0834375619888306, "learning_rate": 1.9600263683919536e-05, "loss": 0.0329, "step": 9400 }, { "epoch": 0.19988942287245354, "eval_f1": 0.6053146767625073, "eval_loss": 0.02858627401292324, "eval_precision": 0.8095674300253705, "eval_recall": 0.4833628895481223, "eval_runtime": 245.988, "eval_samples_per_second": 382.336, "eval_steps_per_second": 23.9, "step": 9400 }, { "epoch": 0.20095266448347723, "grad_norm": 0.35154786705970764, "learning_rate": 1.959813720069749e-05, "loss": 0.0334, "step": 9450 }, { "epoch": 0.2020159060945009, "grad_norm": 0.2093544900417328, "learning_rate": 1.959601071747544e-05, "loss": 0.0318, "step": 9500 }, { "epoch": 0.2030791477055246, "grad_norm": 6.828915119171143, "learning_rate": 1.9593884234253394e-05, "loss": 0.0393, "step": 9550 }, { "epoch": 0.2041423893165483, "grad_norm": 0.30918627977371216, "learning_rate": 1.9591757751031348e-05, "loss": 0.0314, "step": 9600 }, { "epoch": 0.2041423893165483, "eval_f1": 0.6285461192555495, "eval_loss": 0.031848061829805374, "eval_precision": 0.7216088102193445, "eval_recall": 0.5567451701829967, "eval_runtime": 246.0083, "eval_samples_per_second": 382.304, "eval_steps_per_second": 23.898, "step": 9600 }, { "epoch": 0.205205630927572, "grad_norm": 0.942303478717804, "learning_rate": 1.9589631267809298e-05, "loss": 0.0335, "step": 9650 }, { "epoch": 0.20626887253859566, "grad_norm": 0.35771986842155457, "learning_rate": 1.9587504784587252e-05, "loss": 0.0323, "step": 9700 }, { "epoch": 0.20733211414961936, "grad_norm": 0.5412462949752808, "learning_rate": 1.9585378301365202e-05, "loss": 0.0293, "step": 9750 }, { "epoch": 0.20839535576064305, "grad_norm": 0.6716434359550476, "learning_rate": 1.9583251818143156e-05, "loss": 0.0277, "step": 9800 }, { "epoch": 0.20839535576064305, "eval_f1": 0.6182483495270503, "eval_loss": 0.029412491247057915, "eval_precision": 0.8066202555279217, "eval_recall": 0.5012015844340675, "eval_runtime": 245.8615, "eval_samples_per_second": 382.532, "eval_steps_per_second": 23.912, "step": 9800 }, { "epoch": 0.20945859737166675, "grad_norm": 0.7279899716377258, "learning_rate": 1.958112533492111e-05, "loss": 0.0316, "step": 9850 }, { "epoch": 0.21052183898269042, "grad_norm": 0.48316463828086853, "learning_rate": 1.9578998851699063e-05, "loss": 0.0322, "step": 9900 }, { "epoch": 0.21158508059371411, "grad_norm": 0.32974550127983093, "learning_rate": 1.9576872368477014e-05, "loss": 0.0305, "step": 9950 }, { "epoch": 0.2126483222047378, "grad_norm": 0.5153877139091492, "learning_rate": 1.9574745885254967e-05, "loss": 0.031, "step": 10000 }, { "epoch": 0.2126483222047378, "eval_f1": 0.6150894387734773, "eval_loss": 0.02837684191763401, "eval_precision": 0.8192974876126512, "eval_recall": 0.4923678671461683, "eval_runtime": 245.9136, "eval_samples_per_second": 382.451, "eval_steps_per_second": 23.907, "step": 10000 }, { "epoch": 0.2137115638157615, "grad_norm": 0.976379930973053, "learning_rate": 1.957261940203292e-05, "loss": 0.0247, "step": 10050 }, { "epoch": 0.21477480542678518, "grad_norm": 0.3594103753566742, "learning_rate": 1.957049291881087e-05, "loss": 0.0385, "step": 10100 }, { "epoch": 0.21583804703780887, "grad_norm": 0.3341178894042969, "learning_rate": 1.9568366435588825e-05, "loss": 0.0309, "step": 10150 }, { "epoch": 0.21690128864883257, "grad_norm": 0.2684437334537506, "learning_rate": 1.9566239952366775e-05, "loss": 0.0287, "step": 10200 }, { "epoch": 0.21690128864883257, "eval_f1": 0.6090175107830859, "eval_loss": 0.028910333290696144, "eval_precision": 0.8276372609499523, "eval_recall": 0.4817607769693288, "eval_runtime": 245.7528, "eval_samples_per_second": 382.702, "eval_steps_per_second": 23.922, "step": 10200 }, { "epoch": 0.21796453025985624, "grad_norm": 0.3776201605796814, "learning_rate": 1.956411346914473e-05, "loss": 0.0317, "step": 10250 }, { "epoch": 0.21902777187087993, "grad_norm": 0.14755824208259583, "learning_rate": 1.9561986985922683e-05, "loss": 0.0298, "step": 10300 }, { "epoch": 0.22009101348190363, "grad_norm": 0.47210556268692017, "learning_rate": 1.9559860502700637e-05, "loss": 0.0259, "step": 10350 }, { "epoch": 0.22115425509292733, "grad_norm": 0.19691751897335052, "learning_rate": 1.9557734019478587e-05, "loss": 0.0288, "step": 10400 }, { "epoch": 0.22115425509292733, "eval_f1": 0.6252953839995974, "eval_loss": 0.02979074977338314, "eval_precision": 0.7625528667280852, "eval_recall": 0.5299125467512731, "eval_runtime": 245.7033, "eval_samples_per_second": 382.779, "eval_steps_per_second": 23.927, "step": 10400 }, { "epoch": 0.222217496703951, "grad_norm": 0.7690098881721497, "learning_rate": 1.955560753625654e-05, "loss": 0.0333, "step": 10450 }, { "epoch": 0.2232807383149747, "grad_norm": 0.3273935914039612, "learning_rate": 1.9553481053034494e-05, "loss": 0.032, "step": 10500 }, { "epoch": 0.2243439799259984, "grad_norm": 0.3377140462398529, "learning_rate": 1.9551354569812445e-05, "loss": 0.025, "step": 10550 }, { "epoch": 0.22540722153702208, "grad_norm": 1.14713716506958, "learning_rate": 1.95492280865904e-05, "loss": 0.0326, "step": 10600 }, { "epoch": 0.22540722153702208, "eval_f1": 0.6182330978841745, "eval_loss": 0.028236912563443184, "eval_precision": 0.8094607886970572, "eval_recall": 0.5000911546811796, "eval_runtime": 245.7209, "eval_samples_per_second": 382.751, "eval_steps_per_second": 23.926, "step": 10600 }, { "epoch": 0.22647046314804575, "grad_norm": 0.23720699548721313, "learning_rate": 1.954710160336835e-05, "loss": 0.0303, "step": 10650 }, { "epoch": 0.22753370475906945, "grad_norm": 0.29027849435806274, "learning_rate": 1.9544975120146303e-05, "loss": 0.0313, "step": 10700 }, { "epoch": 0.22859694637009315, "grad_norm": 0.8456804752349854, "learning_rate": 1.9542848636924256e-05, "loss": 0.03, "step": 10750 }, { "epoch": 0.22966018798111684, "grad_norm": 0.5251879692077637, "learning_rate": 1.954072215370221e-05, "loss": 0.0293, "step": 10800 }, { "epoch": 0.22966018798111684, "eval_f1": 0.6289209498237237, "eval_loss": 0.029881522059440613, "eval_precision": 0.7889270472125243, "eval_recall": 0.5228743004568494, "eval_runtime": 245.7839, "eval_samples_per_second": 382.653, "eval_steps_per_second": 23.919, "step": 10800 }, { "epoch": 0.2307234295921405, "grad_norm": 2.121523141860962, "learning_rate": 1.953859567048016e-05, "loss": 0.0285, "step": 10850 }, { "epoch": 0.2317866712031642, "grad_norm": 0.6865382194519043, "learning_rate": 1.9536469187258114e-05, "loss": 0.0238, "step": 10900 }, { "epoch": 0.2328499128141879, "grad_norm": 0.6958354711532593, "learning_rate": 1.9534342704036068e-05, "loss": 0.0257, "step": 10950 }, { "epoch": 0.23391315442521157, "grad_norm": 3.2178001403808594, "learning_rate": 1.953221622081402e-05, "loss": 0.0318, "step": 11000 }, { "epoch": 0.23391315442521157, "eval_f1": 0.624071522909666, "eval_loss": 0.028833257034420967, "eval_precision": 0.7713017246285445, "eval_recall": 0.524039975471075, "eval_runtime": 245.6036, "eval_samples_per_second": 382.934, "eval_steps_per_second": 23.937, "step": 11000 }, { "epoch": 0.23497639603623527, "grad_norm": 0.3490201532840729, "learning_rate": 1.9530089737591972e-05, "loss": 0.0264, "step": 11050 }, { "epoch": 0.23603963764725897, "grad_norm": 0.3824993073940277, "learning_rate": 1.9527963254369922e-05, "loss": 0.0311, "step": 11100 }, { "epoch": 0.23710287925828266, "grad_norm": 0.16883927583694458, "learning_rate": 1.9525836771147876e-05, "loss": 0.0266, "step": 11150 }, { "epoch": 0.23816612086930633, "grad_norm": 0.3603559732437134, "learning_rate": 1.952371028792583e-05, "loss": 0.0249, "step": 11200 }, { "epoch": 0.23816612086930633, "eval_f1": 0.630373490677565, "eval_loss": 0.028377104550600052, "eval_precision": 0.7831696893634879, "eval_recall": 0.5274651816740127, "eval_runtime": 245.7549, "eval_samples_per_second": 382.698, "eval_steps_per_second": 23.922, "step": 11200 }, { "epoch": 0.23922936248033003, "grad_norm": 0.3131977915763855, "learning_rate": 1.9521583804703783e-05, "loss": 0.0315, "step": 11250 }, { "epoch": 0.24029260409135372, "grad_norm": 1.3629671335220337, "learning_rate": 1.9519457321481734e-05, "loss": 0.0343, "step": 11300 }, { "epoch": 0.24135584570237742, "grad_norm": 0.5245139002799988, "learning_rate": 1.9517330838259687e-05, "loss": 0.0327, "step": 11350 }, { "epoch": 0.2424190873134011, "grad_norm": 0.569464921951294, "learning_rate": 1.951520435503764e-05, "loss": 0.0295, "step": 11400 }, { "epoch": 0.2424190873134011, "eval_f1": 0.6052424400566557, "eval_loss": 0.028101008385419846, "eval_precision": 0.8250705783609773, "eval_recall": 0.4779101822540907, "eval_runtime": 245.9116, "eval_samples_per_second": 382.454, "eval_steps_per_second": 23.907, "step": 11400 }, { "epoch": 0.24348232892442478, "grad_norm": 0.249623641371727, "learning_rate": 1.9513077871815595e-05, "loss": 0.0275, "step": 11450 }, { "epoch": 0.24454557053544848, "grad_norm": 0.3985916078090668, "learning_rate": 1.9510951388593545e-05, "loss": 0.0287, "step": 11500 }, { "epoch": 0.24560881214647218, "grad_norm": 0.8584663271903992, "learning_rate": 1.95088249053715e-05, "loss": 0.0347, "step": 11550 }, { "epoch": 0.24667205375749585, "grad_norm": 0.8186374306678772, "learning_rate": 1.950669842214945e-05, "loss": 0.0308, "step": 11600 }, { "epoch": 0.24667205375749585, "eval_f1": 0.6298576414353102, "eval_loss": 0.02818148396909237, "eval_precision": 0.7622208621690157, "eval_recall": 0.5366635176867407, "eval_runtime": 245.8472, "eval_samples_per_second": 382.555, "eval_steps_per_second": 23.913, "step": 11600 }, { "epoch": 0.24773529536851954, "grad_norm": 0.4183613657951355, "learning_rate": 1.9504571938927403e-05, "loss": 0.0257, "step": 11650 }, { "epoch": 0.24879853697954324, "grad_norm": 0.440655916929245, "learning_rate": 1.9502445455705357e-05, "loss": 0.0322, "step": 11700 }, { "epoch": 0.2498617785905669, "grad_norm": 0.20508913695812225, "learning_rate": 1.950031897248331e-05, "loss": 0.0348, "step": 11750 }, { "epoch": 0.25092502020159063, "grad_norm": 0.40449175238609314, "learning_rate": 1.949819248926126e-05, "loss": 0.0219, "step": 11800 }, { "epoch": 0.25092502020159063, "eval_f1": 0.629154240745967, "eval_loss": 0.028991786763072014, "eval_precision": 0.7990284075923056, "eval_recall": 0.5188469209053307, "eval_runtime": 246.0813, "eval_samples_per_second": 382.191, "eval_steps_per_second": 23.89, "step": 11800 }, { "epoch": 0.2519882618126143, "grad_norm": 0.2179061770439148, "learning_rate": 1.9496066006039214e-05, "loss": 0.0236, "step": 11850 }, { "epoch": 0.25305150342363797, "grad_norm": 0.33414217829704285, "learning_rate": 1.9493939522817168e-05, "loss": 0.0385, "step": 11900 }, { "epoch": 0.2541147450346617, "grad_norm": 1.082217812538147, "learning_rate": 1.949181303959512e-05, "loss": 0.0317, "step": 11950 }, { "epoch": 0.25517798664568536, "grad_norm": 0.29014524817466736, "learning_rate": 1.9489686556373072e-05, "loss": 0.0246, "step": 12000 }, { "epoch": 0.25517798664568536, "eval_f1": 0.6185736984039693, "eval_loss": 0.030172038823366165, "eval_precision": 0.7865045945069824, "eval_recall": 0.509736977310743, "eval_runtime": 245.7644, "eval_samples_per_second": 382.684, "eval_steps_per_second": 23.921, "step": 12000 }, { "epoch": 0.25624122825670903, "grad_norm": 0.44151216745376587, "learning_rate": 1.9487560073151022e-05, "loss": 0.0347, "step": 12050 }, { "epoch": 0.25730446986773275, "grad_norm": 1.1004785299301147, "learning_rate": 1.9485433589928976e-05, "loss": 0.0243, "step": 12100 }, { "epoch": 0.2583677114787564, "grad_norm": 0.3581538498401642, "learning_rate": 1.948330710670693e-05, "loss": 0.0291, "step": 12150 }, { "epoch": 0.25943095308978015, "grad_norm": 0.508643627166748, "learning_rate": 1.9481180623484884e-05, "loss": 0.0247, "step": 12200 }, { "epoch": 0.25943095308978015, "eval_f1": 0.6332474755848517, "eval_loss": 0.030749084427952766, "eval_precision": 0.8009346110293565, "eval_recall": 0.5236201114849084, "eval_runtime": 245.7553, "eval_samples_per_second": 382.698, "eval_steps_per_second": 23.922, "step": 12200 }, { "epoch": 0.2604941947008038, "grad_norm": 0.6131979823112488, "learning_rate": 1.9479054140262834e-05, "loss": 0.0288, "step": 12250 }, { "epoch": 0.2615574363118275, "grad_norm": 0.9069925546646118, "learning_rate": 1.9476927657040788e-05, "loss": 0.0293, "step": 12300 }, { "epoch": 0.2626206779228512, "grad_norm": 0.760178804397583, "learning_rate": 1.947480117381874e-05, "loss": 0.031, "step": 12350 }, { "epoch": 0.2636839195338749, "grad_norm": 1.7453911304473877, "learning_rate": 1.9472674690596695e-05, "loss": 0.029, "step": 12400 }, { "epoch": 0.2636839195338749, "eval_f1": 0.6124945892984816, "eval_loss": 0.028749842196702957, "eval_precision": 0.8306702615769519, "eval_recall": 0.48508654170185866, "eval_runtime": 245.8174, "eval_samples_per_second": 382.601, "eval_steps_per_second": 23.916, "step": 12400 }, { "epoch": 0.26474716114489855, "grad_norm": 0.2270258069038391, "learning_rate": 1.9470548207374645e-05, "loss": 0.0255, "step": 12450 }, { "epoch": 0.26581040275592227, "grad_norm": 0.6491633653640747, "learning_rate": 1.9468421724152596e-05, "loss": 0.0289, "step": 12500 }, { "epoch": 0.26687364436694594, "grad_norm": 0.49507829546928406, "learning_rate": 1.946629524093055e-05, "loss": 0.0291, "step": 12550 }, { "epoch": 0.2679368859779696, "grad_norm": 1.8073065280914307, "learning_rate": 1.9464168757708503e-05, "loss": 0.0274, "step": 12600 }, { "epoch": 0.2679368859779696, "eval_f1": 0.6316969196806489, "eval_loss": 0.028152622282505035, "eval_precision": 0.7900534156132187, "eval_recall": 0.5262221632939144, "eval_runtime": 245.672, "eval_samples_per_second": 382.827, "eval_steps_per_second": 23.93, "step": 12600 }, { "epoch": 0.26900012758899333, "grad_norm": 0.3526266813278198, "learning_rate": 1.9462042274486457e-05, "loss": 0.0299, "step": 12650 }, { "epoch": 0.270063369200017, "grad_norm": 0.07308772206306458, "learning_rate": 1.9459915791264407e-05, "loss": 0.0314, "step": 12700 }, { "epoch": 0.2711266108110407, "grad_norm": 2.293999195098877, "learning_rate": 1.945778930804236e-05, "loss": 0.0284, "step": 12750 }, { "epoch": 0.2721898524220644, "grad_norm": 1.1089814901351929, "learning_rate": 1.9455662824820315e-05, "loss": 0.0311, "step": 12800 }, { "epoch": 0.2721898524220644, "eval_f1": 0.6170825583828248, "eval_loss": 0.029093077406287193, "eval_precision": 0.8087595163456508, "eval_recall": 0.49885366082721494, "eval_runtime": 245.7041, "eval_samples_per_second": 382.777, "eval_steps_per_second": 23.927, "step": 12800 }, { "epoch": 0.27325309403308806, "grad_norm": 0.5302107334136963, "learning_rate": 1.945353634159827e-05, "loss": 0.0275, "step": 12850 }, { "epoch": 0.2743163356441118, "grad_norm": 0.5255433320999146, "learning_rate": 1.945140985837622e-05, "loss": 0.0259, "step": 12900 }, { "epoch": 0.27537957725513545, "grad_norm": 0.9265036582946777, "learning_rate": 1.9449283375154173e-05, "loss": 0.0262, "step": 12950 }, { "epoch": 0.2764428188661591, "grad_norm": 0.5450620055198669, "learning_rate": 1.9447156891932123e-05, "loss": 0.0331, "step": 13000 }, { "epoch": 0.2764428188661591, "eval_f1": 0.6222867937986357, "eval_loss": 0.02838023006916046, "eval_precision": 0.8162860781745371, "eval_recall": 0.5027926479605934, "eval_runtime": 245.8248, "eval_samples_per_second": 382.59, "eval_steps_per_second": 23.915, "step": 13000 }, { "epoch": 0.27750606047718285, "grad_norm": 0.844329297542572, "learning_rate": 1.9445030408710077e-05, "loss": 0.0324, "step": 13050 }, { "epoch": 0.2785693020882065, "grad_norm": 0.44934821128845215, "learning_rate": 1.944290392548803e-05, "loss": 0.0269, "step": 13100 }, { "epoch": 0.27963254369923024, "grad_norm": 1.1767152547836304, "learning_rate": 1.944077744226598e-05, "loss": 0.0298, "step": 13150 }, { "epoch": 0.2806957853102539, "grad_norm": 1.3480068445205688, "learning_rate": 1.9438650959043934e-05, "loss": 0.0282, "step": 13200 }, { "epoch": 0.2806957853102539, "eval_f1": 0.631924007431982, "eval_loss": 0.027499746531248093, "eval_precision": 0.8059560833097038, "eval_recall": 0.519703222456065, "eval_runtime": 245.7338, "eval_samples_per_second": 382.731, "eval_steps_per_second": 23.924, "step": 13200 }, { "epoch": 0.2817590269212776, "grad_norm": 0.48815545439720154, "learning_rate": 1.9436524475821888e-05, "loss": 0.0281, "step": 13250 }, { "epoch": 0.2828222685323013, "grad_norm": 1.1839730739593506, "learning_rate": 1.9434397992599842e-05, "loss": 0.0311, "step": 13300 }, { "epoch": 0.28388551014332497, "grad_norm": 0.6188672780990601, "learning_rate": 1.9432271509377792e-05, "loss": 0.0323, "step": 13350 }, { "epoch": 0.28494875175434864, "grad_norm": 0.46260952949523926, "learning_rate": 1.9430145026155746e-05, "loss": 0.0302, "step": 13400 }, { "epoch": 0.28494875175434864, "eval_f1": 0.633232790802657, "eval_loss": 0.028365958482027054, "eval_precision": 0.7586539351494186, "eval_recall": 0.5433979150438071, "eval_runtime": 245.91, "eval_samples_per_second": 382.457, "eval_steps_per_second": 23.907, "step": 13400 }, { "epoch": 0.28601199336537236, "grad_norm": 0.6658585667610168, "learning_rate": 1.9428018542933696e-05, "loss": 0.0269, "step": 13450 }, { "epoch": 0.28707523497639603, "grad_norm": 0.4257417917251587, "learning_rate": 1.942589205971165e-05, "loss": 0.0301, "step": 13500 }, { "epoch": 0.2881384765874197, "grad_norm": 0.5339240431785583, "learning_rate": 1.9423765576489604e-05, "loss": 0.0293, "step": 13550 }, { "epoch": 0.2892017181984434, "grad_norm": 0.6730823516845703, "learning_rate": 1.9421639093267554e-05, "loss": 0.0258, "step": 13600 }, { "epoch": 0.2892017181984434, "eval_f1": 0.6238518778068882, "eval_loss": 0.028193354606628418, "eval_precision": 0.829788404919716, "eval_recall": 0.49980940384835726, "eval_runtime": 245.2741, "eval_samples_per_second": 383.449, "eval_steps_per_second": 23.969, "step": 13600 }, { "epoch": 0.2902649598094671, "grad_norm": 0.6491748094558716, "learning_rate": 1.9419512610045508e-05, "loss": 0.0304, "step": 13650 }, { "epoch": 0.2913282014204908, "grad_norm": 0.6707431674003601, "learning_rate": 1.941738612682346e-05, "loss": 0.0319, "step": 13700 }, { "epoch": 0.2923914430315145, "grad_norm": 0.4890502393245697, "learning_rate": 1.9415259643601415e-05, "loss": 0.0282, "step": 13750 }, { "epoch": 0.29345468464253815, "grad_norm": 3.067202091217041, "learning_rate": 1.941313316037937e-05, "loss": 0.0294, "step": 13800 }, { "epoch": 0.29345468464253815, "eval_f1": 0.6012920454651126, "eval_loss": 0.02957030199468136, "eval_precision": 0.8377002102021259, "eval_recall": 0.46894940086511483, "eval_runtime": 245.3084, "eval_samples_per_second": 383.395, "eval_steps_per_second": 23.966, "step": 13800 }, { "epoch": 0.2945179262535619, "grad_norm": 0.23869837820529938, "learning_rate": 1.941100667715732e-05, "loss": 0.0281, "step": 13850 }, { "epoch": 0.29558116786458555, "grad_norm": 0.3491346538066864, "learning_rate": 1.940888019393527e-05, "loss": 0.0252, "step": 13900 }, { "epoch": 0.2966444094756092, "grad_norm": 0.49291735887527466, "learning_rate": 1.9406753710713223e-05, "loss": 0.0259, "step": 13950 }, { "epoch": 0.29770765108663294, "grad_norm": 1.072424292564392, "learning_rate": 1.9404627227491177e-05, "loss": 0.0288, "step": 14000 }, { "epoch": 0.29770765108663294, "eval_f1": 0.6334365378248968, "eval_loss": 0.028837284073233604, "eval_precision": 0.7783306762000758, "eval_recall": 0.5340227941947984, "eval_runtime": 245.61, "eval_samples_per_second": 382.924, "eval_steps_per_second": 23.936, "step": 14000 }, { "epoch": 0.2987708926976566, "grad_norm": 0.45036566257476807, "learning_rate": 1.9402500744269127e-05, "loss": 0.0282, "step": 14050 }, { "epoch": 0.2998341343086803, "grad_norm": 1.255637288093567, "learning_rate": 1.940037426104708e-05, "loss": 0.0306, "step": 14100 }, { "epoch": 0.300897375919704, "grad_norm": 0.23964515328407288, "learning_rate": 1.9398247777825035e-05, "loss": 0.0296, "step": 14150 }, { "epoch": 0.30196061753072767, "grad_norm": 0.20891901850700378, "learning_rate": 1.939612129460299e-05, "loss": 0.0294, "step": 14200 }, { "epoch": 0.30196061753072767, "eval_f1": 0.6188074009741564, "eval_loss": 0.027162963524460793, "eval_precision": 0.7899641331021615, "eval_recall": 0.5086099739794537, "eval_runtime": 245.4378, "eval_samples_per_second": 383.193, "eval_steps_per_second": 23.953, "step": 14200 }, { "epoch": 0.3030238591417514, "grad_norm": 4.989631175994873, "learning_rate": 1.9393994811380942e-05, "loss": 0.036, "step": 14250 }, { "epoch": 0.30408710075277506, "grad_norm": 0.24616652727127075, "learning_rate": 1.9391868328158892e-05, "loss": 0.0277, "step": 14300 }, { "epoch": 0.30515034236379873, "grad_norm": 0.6097586154937744, "learning_rate": 1.9389741844936846e-05, "loss": 0.0272, "step": 14350 }, { "epoch": 0.30621358397482246, "grad_norm": 0.38876038789749146, "learning_rate": 1.9387615361714797e-05, "loss": 0.0319, "step": 14400 }, { "epoch": 0.30621358397482246, "eval_f1": 0.6018414077291476, "eval_loss": 0.027560904622077942, "eval_precision": 0.8353380950511327, "eval_recall": 0.4703636795553601, "eval_runtime": 245.8042, "eval_samples_per_second": 382.622, "eval_steps_per_second": 23.917, "step": 14400 }, { "epoch": 0.3072768255858461, "grad_norm": 1.170117974281311, "learning_rate": 1.938548887849275e-05, "loss": 0.0268, "step": 14450 }, { "epoch": 0.3083400671968698, "grad_norm": 0.6349595189094543, "learning_rate": 1.9383362395270704e-05, "loss": 0.0273, "step": 14500 }, { "epoch": 0.3094033088078935, "grad_norm": 0.24487897753715515, "learning_rate": 1.9381235912048654e-05, "loss": 0.0267, "step": 14550 }, { "epoch": 0.3104665504189172, "grad_norm": 0.8793773651123047, "learning_rate": 1.9379109428826608e-05, "loss": 0.0234, "step": 14600 }, { "epoch": 0.3104665504189172, "eval_f1": 0.6328600224090446, "eval_loss": 0.028049523010849953, "eval_precision": 0.7817549617761833, "eval_recall": 0.5316085762743407, "eval_runtime": 245.8199, "eval_samples_per_second": 382.597, "eval_steps_per_second": 23.916, "step": 14600 }, { "epoch": 0.3115297920299409, "grad_norm": 1.2061700820922852, "learning_rate": 1.9376982945604562e-05, "loss": 0.0287, "step": 14650 }, { "epoch": 0.3125930336409646, "grad_norm": 1.2123327255249023, "learning_rate": 1.9374856462382515e-05, "loss": 0.0347, "step": 14700 }, { "epoch": 0.31365627525198825, "grad_norm": 0.4610384404659271, "learning_rate": 1.9372729979160466e-05, "loss": 0.0265, "step": 14750 }, { "epoch": 0.31471951686301197, "grad_norm": 0.3847057819366455, "learning_rate": 1.937060349593842e-05, "loss": 0.0253, "step": 14800 }, { "epoch": 0.31471951686301197, "eval_f1": 0.6369062180945417, "eval_loss": 0.027722524479031563, "eval_precision": 0.7666327157373624, "eval_recall": 0.5447293258420458, "eval_runtime": 245.9161, "eval_samples_per_second": 382.448, "eval_steps_per_second": 23.907, "step": 14800 }, { "epoch": 0.31578275847403564, "grad_norm": 0.1726764738559723, "learning_rate": 1.936847701271637e-05, "loss": 0.0252, "step": 14850 }, { "epoch": 0.3168460000850593, "grad_norm": 0.7545455694198608, "learning_rate": 1.9366350529494324e-05, "loss": 0.0285, "step": 14900 }, { "epoch": 0.31790924169608303, "grad_norm": 0.5547517538070679, "learning_rate": 1.9364224046272277e-05, "loss": 0.0272, "step": 14950 }, { "epoch": 0.3189724833071067, "grad_norm": 1.8286268711090088, "learning_rate": 1.9362097563050228e-05, "loss": 0.0344, "step": 15000 }, { "epoch": 0.3189724833071067, "eval_f1": 0.6363326468438109, "eval_loss": 0.02822258323431015, "eval_precision": 0.7821470292043681, "eval_recall": 0.536343095170982, "eval_runtime": 246.0157, "eval_samples_per_second": 382.293, "eval_steps_per_second": 23.897, "step": 15000 }, { "epoch": 0.32003572491813037, "grad_norm": 0.6495918035507202, "learning_rate": 1.935997107982818e-05, "loss": 0.0258, "step": 15050 }, { "epoch": 0.3210989665291541, "grad_norm": 0.5396769642829895, "learning_rate": 1.9357844596606135e-05, "loss": 0.0302, "step": 15100 }, { "epoch": 0.32216220814017776, "grad_norm": 0.35984939336776733, "learning_rate": 1.935571811338409e-05, "loss": 0.0304, "step": 15150 }, { "epoch": 0.3232254497512015, "grad_norm": 0.8706404566764832, "learning_rate": 1.935359163016204e-05, "loss": 0.0261, "step": 15200 }, { "epoch": 0.3232254497512015, "eval_f1": 0.6298232332954582, "eval_loss": 0.02841038815677166, "eval_precision": 0.7749094166349975, "eval_recall": 0.5304981465214528, "eval_runtime": 245.7084, "eval_samples_per_second": 382.771, "eval_steps_per_second": 23.927, "step": 15200 }, { "epoch": 0.32428869136222516, "grad_norm": 0.7459996938705444, "learning_rate": 1.9351465146939993e-05, "loss": 0.0246, "step": 15250 }, { "epoch": 0.3253519329732488, "grad_norm": 0.48958104848861694, "learning_rate": 1.9349338663717943e-05, "loss": 0.0256, "step": 15300 }, { "epoch": 0.32641517458427255, "grad_norm": 0.4598415195941925, "learning_rate": 1.9347212180495897e-05, "loss": 0.03, "step": 15350 }, { "epoch": 0.3274784161952962, "grad_norm": 0.27689507603645325, "learning_rate": 1.934508569727385e-05, "loss": 0.0301, "step": 15400 }, { "epoch": 0.3274784161952962, "eval_f1": 0.621963612889917, "eval_loss": 0.02799953706562519, "eval_precision": 0.8044303409100165, "eval_recall": 0.5069691897177239, "eval_runtime": 245.6748, "eval_samples_per_second": 382.823, "eval_steps_per_second": 23.93, "step": 15400 }, { "epoch": 0.3285416578063199, "grad_norm": 0.22370608150959015, "learning_rate": 1.93429592140518e-05, "loss": 0.0254, "step": 15450 }, { "epoch": 0.3296048994173436, "grad_norm": 0.8554309606552124, "learning_rate": 1.9340832730829755e-05, "loss": 0.0227, "step": 15500 }, { "epoch": 0.3306681410283673, "grad_norm": 0.21012742817401886, "learning_rate": 1.933870624760771e-05, "loss": 0.0281, "step": 15550 }, { "epoch": 0.33173138263939095, "grad_norm": 0.36926451325416565, "learning_rate": 1.9336579764385662e-05, "loss": 0.0288, "step": 15600 }, { "epoch": 0.33173138263939095, "eval_f1": 0.6358746598500953, "eval_loss": 0.027018403634428978, "eval_precision": 0.7909587802571988, "eval_recall": 0.5316361989050095, "eval_runtime": 245.6583, "eval_samples_per_second": 382.849, "eval_steps_per_second": 23.932, "step": 15600 }, { "epoch": 0.3327946242504147, "grad_norm": 2.4295670986175537, "learning_rate": 1.9334453281163616e-05, "loss": 0.0263, "step": 15650 }, { "epoch": 0.33385786586143834, "grad_norm": 0.42782336473464966, "learning_rate": 1.9332326797941566e-05, "loss": 0.0275, "step": 15700 }, { "epoch": 0.33492110747246207, "grad_norm": 0.49520182609558105, "learning_rate": 1.933020031471952e-05, "loss": 0.0239, "step": 15750 }, { "epoch": 0.33598434908348573, "grad_norm": 0.44603851437568665, "learning_rate": 1.932807383149747e-05, "loss": 0.0337, "step": 15800 }, { "epoch": 0.33598434908348573, "eval_f1": 0.6125245459346518, "eval_loss": 0.028219029307365417, "eval_precision": 0.831023645898333, "eval_recall": 0.48500367380985215, "eval_runtime": 246.0432, "eval_samples_per_second": 382.25, "eval_steps_per_second": 23.894, "step": 15800 }, { "epoch": 0.3370475906945094, "grad_norm": 0.5008827447891235, "learning_rate": 1.9325947348275424e-05, "loss": 0.0314, "step": 15850 }, { "epoch": 0.3381108323055331, "grad_norm": 0.7007679343223572, "learning_rate": 1.9323820865053374e-05, "loss": 0.0261, "step": 15900 }, { "epoch": 0.3391740739165568, "grad_norm": 0.27255263924598694, "learning_rate": 1.9321694381831328e-05, "loss": 0.0274, "step": 15950 }, { "epoch": 0.34023731552758046, "grad_norm": 0.6860319375991821, "learning_rate": 1.931956789860928e-05, "loss": 0.0261, "step": 16000 }, { "epoch": 0.34023731552758046, "eval_f1": 0.6227571556177042, "eval_loss": 0.026872040703892708, "eval_precision": 0.8010738674867677, "eval_recall": 0.5093723585859141, "eval_runtime": 245.6485, "eval_samples_per_second": 382.864, "eval_steps_per_second": 23.933, "step": 16000 }, { "epoch": 0.3413005571386042, "grad_norm": 0.9416096806526184, "learning_rate": 1.9317441415387235e-05, "loss": 0.0252, "step": 16050 }, { "epoch": 0.34236379874962786, "grad_norm": 1.2772608995437622, "learning_rate": 1.931531493216519e-05, "loss": 0.0303, "step": 16100 }, { "epoch": 0.3434270403606516, "grad_norm": 0.25695914030075073, "learning_rate": 1.931318844894314e-05, "loss": 0.0264, "step": 16150 }, { "epoch": 0.34449028197167525, "grad_norm": 0.6492571234703064, "learning_rate": 1.9311061965721093e-05, "loss": 0.0292, "step": 16200 }, { "epoch": 0.34449028197167525, "eval_f1": 0.6311997344581495, "eval_loss": 0.026376111432909966, "eval_precision": 0.8056999708309049, "eval_recall": 0.5188303473269293, "eval_runtime": 245.7421, "eval_samples_per_second": 382.718, "eval_steps_per_second": 23.923, "step": 16200 }, { "epoch": 0.3455535235826989, "grad_norm": 1.3381452560424805, "learning_rate": 1.9308935482499044e-05, "loss": 0.0272, "step": 16250 }, { "epoch": 0.34661676519372264, "grad_norm": 0.6811099648475647, "learning_rate": 1.9306808999276997e-05, "loss": 0.0263, "step": 16300 }, { "epoch": 0.3476800068047463, "grad_norm": 0.2784237861633301, "learning_rate": 1.9304682516054948e-05, "loss": 0.0335, "step": 16350 }, { "epoch": 0.34874324841577, "grad_norm": 0.8139401078224182, "learning_rate": 1.93025560328329e-05, "loss": 0.0267, "step": 16400 }, { "epoch": 0.34874324841577, "eval_f1": 0.6168381210947261, "eval_loss": 0.026692023500800133, "eval_precision": 0.8235826074117061, "eval_recall": 0.49306395743902337, "eval_runtime": 245.5931, "eval_samples_per_second": 382.95, "eval_steps_per_second": 23.938, "step": 16400 }, { "epoch": 0.3498064900267937, "grad_norm": 0.5254821181297302, "learning_rate": 1.9300429549610855e-05, "loss": 0.0261, "step": 16450 }, { "epoch": 0.3508697316378174, "grad_norm": 0.21997413039207458, "learning_rate": 1.929830306638881e-05, "loss": 0.0274, "step": 16500 }, { "epoch": 0.35193297324884104, "grad_norm": 0.4705319106578827, "learning_rate": 1.9296176583166762e-05, "loss": 0.0238, "step": 16550 }, { "epoch": 0.35299621485986477, "grad_norm": 0.666955292224884, "learning_rate": 1.9294050099944713e-05, "loss": 0.0312, "step": 16600 }, { "epoch": 0.35299621485986477, "eval_f1": 0.6381112414905817, "eval_loss": 0.026699364185333252, "eval_precision": 0.7648734397072197, "eval_recall": 0.5473921474385232, "eval_runtime": 245.5512, "eval_samples_per_second": 383.016, "eval_steps_per_second": 23.942, "step": 16600 }, { "epoch": 0.35405945647088843, "grad_norm": 0.31632161140441895, "learning_rate": 1.9291923616722666e-05, "loss": 0.0278, "step": 16650 }, { "epoch": 0.35512269808191216, "grad_norm": 0.8224464654922485, "learning_rate": 1.9289797133500617e-05, "loss": 0.0288, "step": 16700 }, { "epoch": 0.3561859396929358, "grad_norm": 1.0308637619018555, "learning_rate": 1.928767065027857e-05, "loss": 0.0295, "step": 16750 }, { "epoch": 0.3572491813039595, "grad_norm": 0.24301880598068237, "learning_rate": 1.928554416705652e-05, "loss": 0.0218, "step": 16800 }, { "epoch": 0.3572491813039595, "eval_f1": 0.6015340698285238, "eval_loss": 0.027741150930523872, "eval_precision": 0.8134145652459732, "eval_recall": 0.47722514101350316, "eval_runtime": 245.5736, "eval_samples_per_second": 382.981, "eval_steps_per_second": 23.94, "step": 16800 }, { "epoch": 0.3583124229149832, "grad_norm": 1.140007495880127, "learning_rate": 1.9283417683834475e-05, "loss": 0.0302, "step": 16850 }, { "epoch": 0.3593756645260069, "grad_norm": 0.4287797510623932, "learning_rate": 1.928129120061243e-05, "loss": 0.0271, "step": 16900 }, { "epoch": 0.36043890613703056, "grad_norm": 0.5755913257598877, "learning_rate": 1.9279164717390382e-05, "loss": 0.0296, "step": 16950 }, { "epoch": 0.3615021477480543, "grad_norm": 0.6678063869476318, "learning_rate": 1.9277038234168336e-05, "loss": 0.0257, "step": 17000 }, { "epoch": 0.3615021477480543, "eval_f1": 0.6190341723853652, "eval_loss": 0.026295341551303864, "eval_precision": 0.8383835523334052, "eval_recall": 0.4906607885708332, "eval_runtime": 245.835, "eval_samples_per_second": 382.574, "eval_steps_per_second": 23.914, "step": 17000 }, { "epoch": 0.36256538935907795, "grad_norm": 0.43563616275787354, "learning_rate": 1.9274911750946286e-05, "loss": 0.0287, "step": 17050 }, { "epoch": 0.3636286309701016, "grad_norm": 0.33884409070014954, "learning_rate": 1.927278526772424e-05, "loss": 0.0268, "step": 17100 }, { "epoch": 0.36469187258112534, "grad_norm": 0.3762704133987427, "learning_rate": 1.9270658784502194e-05, "loss": 0.0259, "step": 17150 }, { "epoch": 0.365755114192149, "grad_norm": 1.19070303440094, "learning_rate": 1.9268532301280144e-05, "loss": 0.0239, "step": 17200 }, { "epoch": 0.365755114192149, "eval_f1": 0.6358388767017007, "eval_loss": 0.026374533772468567, "eval_precision": 0.7999313169554314, "eval_recall": 0.5276088193534908, "eval_runtime": 246.1064, "eval_samples_per_second": 382.152, "eval_steps_per_second": 23.888, "step": 17200 }, { "epoch": 0.36681835580317274, "grad_norm": 0.12990151345729828, "learning_rate": 1.9266405818058098e-05, "loss": 0.0253, "step": 17250 }, { "epoch": 0.3678815974141964, "grad_norm": 0.7148898839950562, "learning_rate": 1.9264279334836048e-05, "loss": 0.0326, "step": 17300 }, { "epoch": 0.3689448390252201, "grad_norm": 0.21555839478969574, "learning_rate": 1.9262152851614e-05, "loss": 0.0264, "step": 17350 }, { "epoch": 0.3700080806362438, "grad_norm": 0.47109681367874146, "learning_rate": 1.9260026368391955e-05, "loss": 0.0256, "step": 17400 }, { "epoch": 0.3700080806362438, "eval_f1": 0.6330616801394464, "eval_loss": 0.027095766738057137, "eval_precision": 0.7959916317990966, "eval_recall": 0.5254984503703904, "eval_runtime": 245.9654, "eval_samples_per_second": 382.371, "eval_steps_per_second": 23.902, "step": 17400 }, { "epoch": 0.37107132224726747, "grad_norm": 0.9633088707923889, "learning_rate": 1.925789988516991e-05, "loss": 0.029, "step": 17450 }, { "epoch": 0.37213456385829113, "grad_norm": 0.3357734978199005, "learning_rate": 1.925577340194786e-05, "loss": 0.0313, "step": 17500 }, { "epoch": 0.37319780546931486, "grad_norm": 0.5236226916313171, "learning_rate": 1.9253646918725813e-05, "loss": 0.0293, "step": 17550 }, { "epoch": 0.3742610470803385, "grad_norm": 1.0750893354415894, "learning_rate": 1.9251520435503767e-05, "loss": 0.0258, "step": 17600 }, { "epoch": 0.3742610470803385, "eval_f1": 0.6164433279948135, "eval_loss": 0.027024347335100174, "eval_precision": 0.8247236571681734, "eval_recall": 0.49215241062695125, "eval_runtime": 245.6751, "eval_samples_per_second": 382.823, "eval_steps_per_second": 23.93, "step": 17600 }, { "epoch": 0.37532428869136225, "grad_norm": 0.7233403325080872, "learning_rate": 1.9249393952281717e-05, "loss": 0.0307, "step": 17650 }, { "epoch": 0.3763875303023859, "grad_norm": 1.2922090291976929, "learning_rate": 1.924726746905967e-05, "loss": 0.0265, "step": 17700 }, { "epoch": 0.3774507719134096, "grad_norm": 0.6451268792152405, "learning_rate": 1.924514098583762e-05, "loss": 0.0249, "step": 17750 }, { "epoch": 0.3785140135244333, "grad_norm": 0.6972342729568481, "learning_rate": 1.9243014502615575e-05, "loss": 0.0247, "step": 17800 }, { "epoch": 0.3785140135244333, "eval_f1": 0.6321564128846193, "eval_loss": 0.026451049372553825, "eval_precision": 0.8135145239398686, "eval_recall": 0.5169188612846447, "eval_runtime": 245.8402, "eval_samples_per_second": 382.566, "eval_steps_per_second": 23.914, "step": 17800 }, { "epoch": 0.379577255135457, "grad_norm": 0.1880873590707779, "learning_rate": 1.924088801939353e-05, "loss": 0.0279, "step": 17850 }, { "epoch": 0.38064049674648065, "grad_norm": 1.5592268705368042, "learning_rate": 1.9238761536171482e-05, "loss": 0.0291, "step": 17900 }, { "epoch": 0.3817037383575044, "grad_norm": 0.29585105180740356, "learning_rate": 1.9236635052949433e-05, "loss": 0.0243, "step": 17950 }, { "epoch": 0.38276697996852804, "grad_norm": 0.37384310364723206, "learning_rate": 1.9234508569727386e-05, "loss": 0.0314, "step": 18000 }, { "epoch": 0.38276697996852804, "eval_f1": 0.6387013102612105, "eval_loss": 0.026904717087745667, "eval_precision": 0.7984798753397654, "eval_recall": 0.5322052250967879, "eval_runtime": 245.5813, "eval_samples_per_second": 382.969, "eval_steps_per_second": 23.939, "step": 18000 }, { "epoch": 0.3838302215795517, "grad_norm": 0.477355033159256, "learning_rate": 1.923238208650534e-05, "loss": 0.0284, "step": 18050 }, { "epoch": 0.38489346319057544, "grad_norm": 0.24813386797904968, "learning_rate": 1.923025560328329e-05, "loss": 0.026, "step": 18100 }, { "epoch": 0.3859567048015991, "grad_norm": 1.1887224912643433, "learning_rate": 1.9228129120061244e-05, "loss": 0.027, "step": 18150 }, { "epoch": 0.38701994641262283, "grad_norm": 0.39692503213882446, "learning_rate": 1.9226002636839195e-05, "loss": 0.0267, "step": 18200 }, { "epoch": 0.38701994641262283, "eval_f1": 0.6338783652529092, "eval_loss": 0.02658931352198124, "eval_precision": 0.7911180540057369, "eval_recall": 0.5287800188938502, "eval_runtime": 245.8297, "eval_samples_per_second": 382.582, "eval_steps_per_second": 23.915, "step": 18200 }, { "epoch": 0.3880831880236465, "grad_norm": 0.7639696598052979, "learning_rate": 1.9223876153617148e-05, "loss": 0.0252, "step": 18250 }, { "epoch": 0.38914642963467017, "grad_norm": 0.3552451431751251, "learning_rate": 1.9221749670395102e-05, "loss": 0.0264, "step": 18300 }, { "epoch": 0.3902096712456939, "grad_norm": 0.45400309562683105, "learning_rate": 1.9219623187173056e-05, "loss": 0.0358, "step": 18350 }, { "epoch": 0.39127291285671756, "grad_norm": 0.5257982611656189, "learning_rate": 1.9217496703951006e-05, "loss": 0.0243, "step": 18400 }, { "epoch": 0.39127291285671756, "eval_f1": 0.6425884567331157, "eval_loss": 0.027538973838090897, "eval_precision": 0.7876197730825653, "eval_recall": 0.5426631530680156, "eval_runtime": 245.6115, "eval_samples_per_second": 382.922, "eval_steps_per_second": 23.936, "step": 18400 }, { "epoch": 0.3923361544677412, "grad_norm": 0.44141244888305664, "learning_rate": 1.921537022072896e-05, "loss": 0.0255, "step": 18450 }, { "epoch": 0.39339939607876495, "grad_norm": 0.3380017876625061, "learning_rate": 1.9213243737506913e-05, "loss": 0.0249, "step": 18500 }, { "epoch": 0.3944626376897886, "grad_norm": 0.5815762877464294, "learning_rate": 1.9211117254284867e-05, "loss": 0.0288, "step": 18550 }, { "epoch": 0.39552587930081234, "grad_norm": 0.2498030662536621, "learning_rate": 1.9208990771062818e-05, "loss": 0.0288, "step": 18600 }, { "epoch": 0.39552587930081234, "eval_f1": 0.6374557563964866, "eval_loss": 0.026700599119067192, "eval_precision": 0.778200758179034, "eval_recall": 0.5398235466352574, "eval_runtime": 233.5225, "eval_samples_per_second": 402.745, "eval_steps_per_second": 12.59, "step": 18600 }, { "epoch": 0.396589120911836, "grad_norm": 0.524265468120575, "learning_rate": 1.920686428784077e-05, "loss": 0.0247, "step": 18650 }, { "epoch": 0.3976523625228597, "grad_norm": 0.304810106754303, "learning_rate": 1.920473780461872e-05, "loss": 0.0297, "step": 18700 }, { "epoch": 0.3987156041338834, "grad_norm": 0.22695355117321014, "learning_rate": 1.9202611321396675e-05, "loss": 0.0294, "step": 18750 }, { "epoch": 0.3997788457449071, "grad_norm": 0.6477736830711365, "learning_rate": 1.920048483817463e-05, "loss": 0.0301, "step": 18800 }, { "epoch": 0.3997788457449071, "eval_f1": 0.6303571199142087, "eval_loss": 0.025601865723729134, "eval_precision": 0.8191607773850866, "eval_recall": 0.5122837838584112, "eval_runtime": 231.8783, "eval_samples_per_second": 405.601, "eval_steps_per_second": 12.679, "step": 18800 }, { "epoch": 0.40084208735593074, "grad_norm": 0.37915024161338806, "learning_rate": 1.9198358354952583e-05, "loss": 0.0279, "step": 18850 }, { "epoch": 0.40190532896695447, "grad_norm": 0.6910470128059387, "learning_rate": 1.9196231871730533e-05, "loss": 0.0308, "step": 18900 }, { "epoch": 0.40296857057797814, "grad_norm": 0.14886777102947235, "learning_rate": 1.9194105388508487e-05, "loss": 0.0275, "step": 18950 }, { "epoch": 0.4040318121890018, "grad_norm": 0.6383010745048523, "learning_rate": 1.919197890528644e-05, "loss": 0.026, "step": 19000 }, { "epoch": 0.4040318121890018, "eval_f1": 0.6411230252083796, "eval_loss": 0.02541719190776348, "eval_precision": 0.8097835709527423, "eval_recall": 0.5306086370441282, "eval_runtime": 231.9041, "eval_samples_per_second": 405.555, "eval_steps_per_second": 12.678, "step": 19000 }, { "epoch": 0.40509505380002553, "grad_norm": 0.19128908216953278, "learning_rate": 1.918985242206439e-05, "loss": 0.0289, "step": 19050 }, { "epoch": 0.4061582954110492, "grad_norm": 0.39352890849113464, "learning_rate": 1.9187725938842345e-05, "loss": 0.0265, "step": 19100 }, { "epoch": 0.4072215370220729, "grad_norm": 0.3821277916431427, "learning_rate": 1.9185599455620295e-05, "loss": 0.0255, "step": 19150 }, { "epoch": 0.4082847786330966, "grad_norm": 0.3819776773452759, "learning_rate": 1.918347297239825e-05, "loss": 0.0234, "step": 19200 }, { "epoch": 0.4082847786330966, "eval_f1": 0.6331240171384451, "eval_loss": 0.025795873254537582, "eval_precision": 0.7866794654064185, "eval_recall": 0.5297247128627249, "eval_runtime": 231.8967, "eval_samples_per_second": 405.568, "eval_steps_per_second": 12.678, "step": 19200 }, { "epoch": 0.40934802024412026, "grad_norm": 0.2382950782775879, "learning_rate": 1.9181346489176202e-05, "loss": 0.0312, "step": 19250 }, { "epoch": 0.410411261855144, "grad_norm": 1.1939817667007446, "learning_rate": 1.9179220005954156e-05, "loss": 0.0374, "step": 19300 }, { "epoch": 0.41147450346616765, "grad_norm": 0.8645598888397217, "learning_rate": 1.9177093522732106e-05, "loss": 0.0288, "step": 19350 }, { "epoch": 0.4125377450771913, "grad_norm": 0.15922684967517853, "learning_rate": 1.917496703951006e-05, "loss": 0.0265, "step": 19400 }, { "epoch": 0.4125377450771913, "eval_f1": 0.6408644611484778, "eval_loss": 0.027360359206795692, "eval_precision": 0.744769109212166, "eval_recall": 0.5624022849439778, "eval_runtime": 231.9427, "eval_samples_per_second": 405.488, "eval_steps_per_second": 12.676, "step": 19400 }, { "epoch": 0.41360098668821504, "grad_norm": 0.8433595299720764, "learning_rate": 1.9172840556288014e-05, "loss": 0.0264, "step": 19450 }, { "epoch": 0.4146642282992387, "grad_norm": 0.4086915850639343, "learning_rate": 1.9170714073065964e-05, "loss": 0.0276, "step": 19500 }, { "epoch": 0.4157274699102624, "grad_norm": 1.513273000717163, "learning_rate": 1.9168587589843918e-05, "loss": 0.0269, "step": 19550 }, { "epoch": 0.4167907115212861, "grad_norm": 0.28035780787467957, "learning_rate": 1.9166461106621868e-05, "loss": 0.0314, "step": 19600 }, { "epoch": 0.4167907115212861, "eval_f1": 0.6323576842485322, "eval_loss": 0.025167785584926605, "eval_precision": 0.835774011094189, "eval_recall": 0.5085768268226512, "eval_runtime": 231.7824, "eval_samples_per_second": 405.769, "eval_steps_per_second": 12.684, "step": 19600 }, { "epoch": 0.4178539531323098, "grad_norm": 0.41457900404930115, "learning_rate": 1.9164334623399822e-05, "loss": 0.0308, "step": 19650 }, { "epoch": 0.4189171947433335, "grad_norm": 0.5549628734588623, "learning_rate": 1.9162208140177776e-05, "loss": 0.0258, "step": 19700 }, { "epoch": 0.41998043635435717, "grad_norm": 1.7342987060546875, "learning_rate": 1.916008165695573e-05, "loss": 0.0243, "step": 19750 }, { "epoch": 0.42104367796538084, "grad_norm": 0.4913669228553772, "learning_rate": 1.915795517373368e-05, "loss": 0.0257, "step": 19800 }, { "epoch": 0.42104367796538084, "eval_f1": 0.6201475998857747, "eval_loss": 0.026156138628721237, "eval_precision": 0.835679168422453, "eval_recall": 0.49299766312541815, "eval_runtime": 232.1262, "eval_samples_per_second": 405.168, "eval_steps_per_second": 12.666, "step": 19800 }, { "epoch": 0.42210691957640456, "grad_norm": 0.5984342694282532, "learning_rate": 1.9155828690511633e-05, "loss": 0.0266, "step": 19850 }, { "epoch": 0.42317016118742823, "grad_norm": 0.4704006612300873, "learning_rate": 1.9153702207289587e-05, "loss": 0.0253, "step": 19900 }, { "epoch": 0.4242334027984519, "grad_norm": 0.14387427270412445, "learning_rate": 1.915157572406754e-05, "loss": 0.0248, "step": 19950 }, { "epoch": 0.4252966444094756, "grad_norm": 0.5670871138572693, "learning_rate": 1.914944924084549e-05, "loss": 0.03, "step": 20000 }, { "epoch": 0.4252966444094756, "eval_f1": 0.6397629208798431, "eval_loss": 0.02530701458454132, "eval_precision": 0.7983950943769794, "eval_recall": 0.533718945257441, "eval_runtime": 232.2931, "eval_samples_per_second": 404.876, "eval_steps_per_second": 12.656, "step": 20000 }, { "epoch": 0.4263598860204993, "grad_norm": 0.5791890025138855, "learning_rate": 1.9147322757623445e-05, "loss": 0.0275, "step": 20050 }, { "epoch": 0.427423127631523, "grad_norm": 0.5356228351593018, "learning_rate": 1.9145196274401395e-05, "loss": 0.0235, "step": 20100 }, { "epoch": 0.4284863692425467, "grad_norm": 0.40538740158081055, "learning_rate": 1.914306979117935e-05, "loss": 0.0232, "step": 20150 }, { "epoch": 0.42954961085357035, "grad_norm": 0.509769856929779, "learning_rate": 1.9140943307957303e-05, "loss": 0.0316, "step": 20200 }, { "epoch": 0.42954961085357035, "eval_f1": 0.6266049256908991, "eval_loss": 0.02531013824045658, "eval_precision": 0.8375517674899389, "eval_recall": 0.500538641298015, "eval_runtime": 232.0968, "eval_samples_per_second": 405.219, "eval_steps_per_second": 12.667, "step": 20200 }, { "epoch": 0.4306128524645941, "grad_norm": 1.1030365228652954, "learning_rate": 1.9138816824735253e-05, "loss": 0.0247, "step": 20250 }, { "epoch": 0.43167609407561774, "grad_norm": 0.4791840612888336, "learning_rate": 1.9136690341513207e-05, "loss": 0.0245, "step": 20300 }, { "epoch": 0.4327393356866414, "grad_norm": 0.20822525024414062, "learning_rate": 1.913456385829116e-05, "loss": 0.0238, "step": 20350 }, { "epoch": 0.43380257729766514, "grad_norm": 0.3552989363670349, "learning_rate": 1.9132437375069114e-05, "loss": 0.0231, "step": 20400 }, { "epoch": 0.43380257729766514, "eval_f1": 0.6243178070412103, "eval_loss": 0.02577747032046318, "eval_precision": 0.8464775413710783, "eval_recall": 0.4945279568644726, "eval_runtime": 231.8195, "eval_samples_per_second": 405.704, "eval_steps_per_second": 12.682, "step": 20400 }, { "epoch": 0.4348658189086888, "grad_norm": 1.2767115831375122, "learning_rate": 1.9130310891847065e-05, "loss": 0.0246, "step": 20450 }, { "epoch": 0.4359290605197125, "grad_norm": 0.32669374346733093, "learning_rate": 1.9128184408625018e-05, "loss": 0.0241, "step": 20500 }, { "epoch": 0.4369923021307362, "grad_norm": 0.6649191379547119, "learning_rate": 1.912605792540297e-05, "loss": 0.0245, "step": 20550 }, { "epoch": 0.43805554374175987, "grad_norm": 1.3439606428146362, "learning_rate": 1.9123931442180922e-05, "loss": 0.0293, "step": 20600 }, { "epoch": 0.43805554374175987, "eval_f1": 0.6488589061438923, "eval_loss": 0.025827594101428986, "eval_precision": 0.7965015795946273, "eval_recall": 0.5473921474385232, "eval_runtime": 231.5812, "eval_samples_per_second": 406.121, "eval_steps_per_second": 12.695, "step": 20600 }, { "epoch": 0.4391187853527836, "grad_norm": 0.37668997049331665, "learning_rate": 1.9121804958958876e-05, "loss": 0.0246, "step": 20650 }, { "epoch": 0.44018202696380726, "grad_norm": 0.27987799048423767, "learning_rate": 1.9119678475736826e-05, "loss": 0.0236, "step": 20700 }, { "epoch": 0.44124526857483093, "grad_norm": 0.4473624527454376, "learning_rate": 1.911755199251478e-05, "loss": 0.0287, "step": 20750 }, { "epoch": 0.44230851018585465, "grad_norm": 0.665261447429657, "learning_rate": 1.9115425509292734e-05, "loss": 0.0232, "step": 20800 }, { "epoch": 0.44230851018585465, "eval_f1": 0.6402465767283726, "eval_loss": 0.02529912069439888, "eval_precision": 0.814769603742269, "eval_recall": 0.5272994458899997, "eval_runtime": 231.7599, "eval_samples_per_second": 405.808, "eval_steps_per_second": 12.686, "step": 20800 }, { "epoch": 0.4433717517968783, "grad_norm": 0.3729698061943054, "learning_rate": 1.9113299026070688e-05, "loss": 0.0248, "step": 20850 }, { "epoch": 0.444434993407902, "grad_norm": 1.498306393623352, "learning_rate": 1.9111172542848638e-05, "loss": 0.0288, "step": 20900 }, { "epoch": 0.4454982350189257, "grad_norm": 0.9777145981788635, "learning_rate": 1.910904605962659e-05, "loss": 0.0248, "step": 20950 }, { "epoch": 0.4465614766299494, "grad_norm": 0.5849199295043945, "learning_rate": 1.9106919576404542e-05, "loss": 0.027, "step": 21000 }, { "epoch": 0.4465614766299494, "eval_f1": 0.6463704653568151, "eval_loss": 0.02714352309703827, "eval_precision": 0.7615030157718681, "eval_recall": 0.5614796890796382, "eval_runtime": 231.7812, "eval_samples_per_second": 405.771, "eval_steps_per_second": 12.684, "step": 21000 }, { "epoch": 0.44762471824097305, "grad_norm": 0.5037556886672974, "learning_rate": 1.9104793093182496e-05, "loss": 0.0335, "step": 21050 }, { "epoch": 0.4486879598519968, "grad_norm": 0.3387298583984375, "learning_rate": 1.910266660996045e-05, "loss": 0.0218, "step": 21100 }, { "epoch": 0.44975120146302044, "grad_norm": 0.39483198523521423, "learning_rate": 1.91005401267384e-05, "loss": 0.0225, "step": 21150 }, { "epoch": 0.45081444307404417, "grad_norm": 1.189562201499939, "learning_rate": 1.9098413643516353e-05, "loss": 0.0281, "step": 21200 }, { "epoch": 0.45081444307404417, "eval_f1": 0.6262165340643764, "eval_loss": 0.024948321282863617, "eval_precision": 0.8180668038738034, "eval_recall": 0.50725646507668, "eval_runtime": 231.5903, "eval_samples_per_second": 406.105, "eval_steps_per_second": 12.695, "step": 21200 }, { "epoch": 0.45187768468506784, "grad_norm": 0.604805588722229, "learning_rate": 1.9096287160294307e-05, "loss": 0.0227, "step": 21250 }, { "epoch": 0.4529409262960915, "grad_norm": 0.9140902161598206, "learning_rate": 1.909416067707226e-05, "loss": 0.0227, "step": 21300 }, { "epoch": 0.45400416790711523, "grad_norm": 0.9777218103408813, "learning_rate": 1.9092034193850215e-05, "loss": 0.0239, "step": 21350 }, { "epoch": 0.4550674095181389, "grad_norm": 1.0822975635528564, "learning_rate": 1.9089907710628165e-05, "loss": 0.0271, "step": 21400 }, { "epoch": 0.4550674095181389, "eval_f1": 0.634499235646405, "eval_loss": 0.02546519972383976, "eval_precision": 0.8304645468031503, "eval_recall": 0.5133610664544964, "eval_runtime": 231.7881, "eval_samples_per_second": 405.759, "eval_steps_per_second": 12.684, "step": 21400 }, { "epoch": 0.45613065112916257, "grad_norm": 1.1917304992675781, "learning_rate": 1.908778122740612e-05, "loss": 0.03, "step": 21450 }, { "epoch": 0.4571938927401863, "grad_norm": 0.4548349976539612, "learning_rate": 1.908565474418407e-05, "loss": 0.0278, "step": 21500 }, { "epoch": 0.45825713435120996, "grad_norm": 1.22917902469635, "learning_rate": 1.9083528260962023e-05, "loss": 0.0258, "step": 21550 }, { "epoch": 0.4593203759622337, "grad_norm": 0.48739805817604065, "learning_rate": 1.9081401777739976e-05, "loss": 0.0259, "step": 21600 }, { "epoch": 0.4593203759622337, "eval_f1": 0.64627080757714, "eval_loss": 0.02555367909371853, "eval_precision": 0.7688087654865996, "eval_recall": 0.5574246868974505, "eval_runtime": 231.7523, "eval_samples_per_second": 405.821, "eval_steps_per_second": 12.686, "step": 21600 }, { "epoch": 0.46038361757325735, "grad_norm": 0.15350136160850525, "learning_rate": 1.9079275294517927e-05, "loss": 0.0285, "step": 21650 }, { "epoch": 0.461446859184281, "grad_norm": 0.5537261962890625, "learning_rate": 1.907714881129588e-05, "loss": 0.0255, "step": 21700 }, { "epoch": 0.46251010079530475, "grad_norm": 0.2929774224758148, "learning_rate": 1.9075022328073834e-05, "loss": 0.027, "step": 21750 }, { "epoch": 0.4635733424063284, "grad_norm": 0.17201334238052368, "learning_rate": 1.9072895844851788e-05, "loss": 0.0273, "step": 21800 }, { "epoch": 0.4635733424063284, "eval_f1": 0.6354863739437547, "eval_loss": 0.024812927469611168, "eval_precision": 0.7919999340336106, "eval_recall": 0.5306252106225295, "eval_runtime": 231.9601, "eval_samples_per_second": 405.458, "eval_steps_per_second": 12.675, "step": 21800 }, { "epoch": 0.4646365840173521, "grad_norm": 0.3085987865924835, "learning_rate": 1.9070769361629738e-05, "loss": 0.0288, "step": 21850 }, { "epoch": 0.4656998256283758, "grad_norm": 0.6803265810012817, "learning_rate": 1.9068642878407692e-05, "loss": 0.0259, "step": 21900 }, { "epoch": 0.4667630672393995, "grad_norm": 0.3903048634529114, "learning_rate": 1.9066516395185642e-05, "loss": 0.0341, "step": 21950 }, { "epoch": 0.46782630885042314, "grad_norm": 0.3869380056858063, "learning_rate": 1.9064389911963596e-05, "loss": 0.0265, "step": 22000 }, { "epoch": 0.46782630885042314, "eval_f1": 0.6396292033257498, "eval_loss": 0.02513076364994049, "eval_precision": 0.7972142733442082, "eval_recall": 0.5340614658777348, "eval_runtime": 231.6433, "eval_samples_per_second": 406.012, "eval_steps_per_second": 12.692, "step": 22000 }, { "epoch": 0.46888955046144687, "grad_norm": 0.4854709208011627, "learning_rate": 1.906226342874155e-05, "loss": 0.0302, "step": 22050 }, { "epoch": 0.46995279207247054, "grad_norm": 0.34303438663482666, "learning_rate": 1.90601369455195e-05, "loss": 0.0266, "step": 22100 }, { "epoch": 0.47101603368349426, "grad_norm": 0.49321454763412476, "learning_rate": 1.9058010462297454e-05, "loss": 0.0283, "step": 22150 }, { "epoch": 0.47207927529451793, "grad_norm": 0.36678585410118103, "learning_rate": 1.9055883979075407e-05, "loss": 0.0271, "step": 22200 }, { "epoch": 0.47207927529451793, "eval_f1": 0.6235128949356326, "eval_loss": 0.024758491665124893, "eval_precision": 0.8507583861846822, "eval_recall": 0.49207506726107847, "eval_runtime": 231.6576, "eval_samples_per_second": 405.987, "eval_steps_per_second": 12.691, "step": 22200 }, { "epoch": 0.4731425169055416, "grad_norm": 0.23840844631195068, "learning_rate": 1.905375749585336e-05, "loss": 0.0228, "step": 22250 }, { "epoch": 0.4742057585165653, "grad_norm": 0.5142632722854614, "learning_rate": 1.905163101263131e-05, "loss": 0.028, "step": 22300 }, { "epoch": 0.475269000127589, "grad_norm": 0.3825411796569824, "learning_rate": 1.9049504529409265e-05, "loss": 0.0284, "step": 22350 }, { "epoch": 0.47633224173861266, "grad_norm": 0.3794719874858856, "learning_rate": 1.9047378046187216e-05, "loss": 0.0255, "step": 22400 }, { "epoch": 0.47633224173861266, "eval_f1": 0.6434737941137695, "eval_loss": 0.024812132120132446, "eval_precision": 0.8106792040969851, "eval_recall": 0.5334482434768862, "eval_runtime": 231.6885, "eval_samples_per_second": 405.933, "eval_steps_per_second": 12.689, "step": 22400 }, { "epoch": 0.4773954833496364, "grad_norm": 0.7301716804504395, "learning_rate": 1.904525156296517e-05, "loss": 0.0265, "step": 22450 }, { "epoch": 0.47845872496066005, "grad_norm": 1.0359004735946655, "learning_rate": 1.9043125079743123e-05, "loss": 0.0259, "step": 22500 }, { "epoch": 0.4795219665716837, "grad_norm": 0.4555063545703888, "learning_rate": 1.9040998596521073e-05, "loss": 0.0274, "step": 22550 }, { "epoch": 0.48058520818270745, "grad_norm": 0.7127256989479065, "learning_rate": 1.9038872113299027e-05, "loss": 0.0313, "step": 22600 }, { "epoch": 0.48058520818270745, "eval_f1": 0.6440242482827816, "eval_loss": 0.02486898936331272, "eval_precision": 0.8098286842919473, "eval_recall": 0.5345752468081755, "eval_runtime": 231.7049, "eval_samples_per_second": 405.904, "eval_steps_per_second": 12.689, "step": 22600 }, { "epoch": 0.4816484497937311, "grad_norm": 0.45437130331993103, "learning_rate": 1.903674563007698e-05, "loss": 0.0241, "step": 22650 }, { "epoch": 0.48271169140475484, "grad_norm": 0.47152650356292725, "learning_rate": 1.9034619146854935e-05, "loss": 0.0246, "step": 22700 }, { "epoch": 0.4837749330157785, "grad_norm": 0.5230661630630493, "learning_rate": 1.9032492663632885e-05, "loss": 0.0318, "step": 22750 }, { "epoch": 0.4848381746268022, "grad_norm": 0.6445286870002747, "learning_rate": 1.903036618041084e-05, "loss": 0.0239, "step": 22800 }, { "epoch": 0.4848381746268022, "eval_f1": 0.6425987153571263, "eval_loss": 0.02478090487420559, "eval_precision": 0.8197713286592712, "eval_recall": 0.52839882659062, "eval_runtime": 231.6675, "eval_samples_per_second": 405.97, "eval_steps_per_second": 12.691, "step": 22800 }, { "epoch": 0.4859014162378259, "grad_norm": 1.7705179452896118, "learning_rate": 1.902823969718879e-05, "loss": 0.0316, "step": 22850 }, { "epoch": 0.48696465784884957, "grad_norm": 2.132664442062378, "learning_rate": 1.9026113213966743e-05, "loss": 0.0214, "step": 22900 }, { "epoch": 0.48802789945987324, "grad_norm": 0.7714946269989014, "learning_rate": 1.9023986730744696e-05, "loss": 0.0274, "step": 22950 }, { "epoch": 0.48909114107089696, "grad_norm": 0.8999313116073608, "learning_rate": 1.9021860247522647e-05, "loss": 0.0241, "step": 23000 }, { "epoch": 0.48909114107089696, "eval_f1": 0.6503143589017285, "eval_loss": 0.025313543155789375, "eval_precision": 0.7837452471482279, "eval_recall": 0.5557065592698479, "eval_runtime": 231.5932, "eval_samples_per_second": 406.1, "eval_steps_per_second": 12.695, "step": 23000 }, { "epoch": 0.49015438268192063, "grad_norm": 0.6610444784164429, "learning_rate": 1.90197337643006e-05, "loss": 0.0262, "step": 23050 }, { "epoch": 0.49121762429294435, "grad_norm": 0.7641913294792175, "learning_rate": 1.9017607281078554e-05, "loss": 0.0253, "step": 23100 }, { "epoch": 0.492280865903968, "grad_norm": 0.5516812205314636, "learning_rate": 1.9015480797856508e-05, "loss": 0.0262, "step": 23150 }, { "epoch": 0.4933441075149917, "grad_norm": 0.49463579058647156, "learning_rate": 1.901335431463446e-05, "loss": 0.0261, "step": 23200 }, { "epoch": 0.4933441075149917, "eval_f1": 0.6249943114892792, "eval_loss": 0.024806438013911247, "eval_precision": 0.8518010029936454, "eval_recall": 0.4935722138433303, "eval_runtime": 231.6815, "eval_samples_per_second": 405.945, "eval_steps_per_second": 12.69, "step": 23200 }, { "epoch": 0.4944073491260154, "grad_norm": 0.8895831108093262, "learning_rate": 1.9011227831412412e-05, "loss": 0.0257, "step": 23250 }, { "epoch": 0.4954705907370391, "grad_norm": 0.48920270800590515, "learning_rate": 1.9009101348190366e-05, "loss": 0.0292, "step": 23300 }, { "epoch": 0.49653383234806275, "grad_norm": 0.6251733303070068, "learning_rate": 1.9006974864968316e-05, "loss": 0.0324, "step": 23350 }, { "epoch": 0.4975970739590865, "grad_norm": 0.1994381546974182, "learning_rate": 1.900484838174627e-05, "loss": 0.0259, "step": 23400 }, { "epoch": 0.4975970739590865, "eval_f1": 0.642097388815144, "eval_loss": 0.024738729000091553, "eval_precision": 0.8341500454709264, "eval_recall": 0.5219296064879746, "eval_runtime": 231.5884, "eval_samples_per_second": 406.108, "eval_steps_per_second": 12.695, "step": 23400 }, { "epoch": 0.49866031557011015, "grad_norm": 3.1994924545288086, "learning_rate": 1.900272189852422e-05, "loss": 0.0315, "step": 23450 }, { "epoch": 0.4997235571811338, "grad_norm": 0.5864654183387756, "learning_rate": 1.9000595415302174e-05, "loss": 0.0308, "step": 23500 }, { "epoch": 0.5007867987921575, "grad_norm": 0.3681029975414276, "learning_rate": 1.8998468932080127e-05, "loss": 0.0255, "step": 23550 }, { "epoch": 0.5018500404031813, "grad_norm": 2.5087263584136963, "learning_rate": 1.899634244885808e-05, "loss": 0.0293, "step": 23600 }, { "epoch": 0.5018500404031813, "eval_f1": 0.6465845807662727, "eval_loss": 0.02454206347465515, "eval_precision": 0.8009631866785731, "eval_recall": 0.5420996514023709, "eval_runtime": 231.4678, "eval_samples_per_second": 406.32, "eval_steps_per_second": 12.702, "step": 23600 }, { "epoch": 0.5029132820142049, "grad_norm": 0.5407713055610657, "learning_rate": 1.8994215965636035e-05, "loss": 0.0216, "step": 23650 }, { "epoch": 0.5039765236252286, "grad_norm": 0.4722153842449188, "learning_rate": 1.8992089482413985e-05, "loss": 0.03, "step": 23700 }, { "epoch": 0.5050397652362523, "grad_norm": 0.48353660106658936, "learning_rate": 1.898996299919194e-05, "loss": 0.0293, "step": 23750 }, { "epoch": 0.5061030068472759, "grad_norm": 0.4492917060852051, "learning_rate": 1.898783651596989e-05, "loss": 0.0246, "step": 23800 }, { "epoch": 0.5061030068472759, "eval_f1": 0.6226829966157834, "eval_loss": 0.024457735940814018, "eval_precision": 0.8526558447794782, "eval_recall": 0.49041218489481353, "eval_runtime": 231.4752, "eval_samples_per_second": 406.307, "eval_steps_per_second": 12.701, "step": 23800 }, { "epoch": 0.5071662484582997, "grad_norm": 0.7943255305290222, "learning_rate": 1.8985710032747843e-05, "loss": 0.0336, "step": 23850 }, { "epoch": 0.5082294900693234, "grad_norm": 0.1668010801076889, "learning_rate": 1.8983583549525793e-05, "loss": 0.0258, "step": 23900 }, { "epoch": 0.509292731680347, "grad_norm": 0.4899482727050781, "learning_rate": 1.8981457066303747e-05, "loss": 0.0249, "step": 23950 }, { "epoch": 0.5103559732913707, "grad_norm": 0.2759023904800415, "learning_rate": 1.89793305830817e-05, "loss": 0.0242, "step": 24000 }, { "epoch": 0.5103559732913707, "eval_f1": 0.6461564986894439, "eval_loss": 0.024712318554520607, "eval_precision": 0.8102429962596062, "eval_recall": 0.5373375098750608, "eval_runtime": 231.5568, "eval_samples_per_second": 406.164, "eval_steps_per_second": 12.697, "step": 24000 }, { "epoch": 0.5114192149023944, "grad_norm": 0.35445791482925415, "learning_rate": 1.8977204099859654e-05, "loss": 0.0265, "step": 24050 }, { "epoch": 0.5124824565134181, "grad_norm": 0.2838018834590912, "learning_rate": 1.8975077616637608e-05, "loss": 0.0284, "step": 24100 }, { "epoch": 0.5135456981244418, "grad_norm": 0.5549056529998779, "learning_rate": 1.897295113341556e-05, "loss": 0.0238, "step": 24150 }, { "epoch": 0.5146089397354655, "grad_norm": 0.8570245504379272, "learning_rate": 1.8970824650193512e-05, "loss": 0.025, "step": 24200 }, { "epoch": 0.5146089397354655, "eval_f1": 0.6464621967489925, "eval_loss": 0.024614857509732246, "eval_precision": 0.8256732618850624, "eval_recall": 0.5311721387097728, "eval_runtime": 231.5434, "eval_samples_per_second": 406.187, "eval_steps_per_second": 12.697, "step": 24200 }, { "epoch": 0.5156721813464892, "grad_norm": 0.9104923009872437, "learning_rate": 1.8968698166971463e-05, "loss": 0.0248, "step": 24250 }, { "epoch": 0.5167354229575128, "grad_norm": 0.7304602861404419, "learning_rate": 1.8966571683749416e-05, "loss": 0.0275, "step": 24300 }, { "epoch": 0.5177986645685365, "grad_norm": 0.19481630623340607, "learning_rate": 1.896444520052737e-05, "loss": 0.0294, "step": 24350 }, { "epoch": 0.5188619061795603, "grad_norm": 0.11748157441616058, "learning_rate": 1.896231871730532e-05, "loss": 0.0294, "step": 24400 }, { "epoch": 0.5188619061795603, "eval_f1": 0.6433848172354668, "eval_loss": 0.025413619354367256, "eval_precision": 0.807885310644768, "eval_recall": 0.5345420996513728, "eval_runtime": 232.1135, "eval_samples_per_second": 405.19, "eval_steps_per_second": 12.666, "step": 24400 }, { "epoch": 0.519925147790584, "grad_norm": 0.35314643383026123, "learning_rate": 1.8960192234083274e-05, "loss": 0.0294, "step": 24450 }, { "epoch": 0.5209883894016076, "grad_norm": 0.7391543388366699, "learning_rate": 1.8958065750861228e-05, "loss": 0.0227, "step": 24500 }, { "epoch": 0.5220516310126313, "grad_norm": 0.2834475040435791, "learning_rate": 1.895593926763918e-05, "loss": 0.0272, "step": 24550 }, { "epoch": 0.523114872623655, "grad_norm": 0.15329889953136444, "learning_rate": 1.8953812784417132e-05, "loss": 0.0216, "step": 24600 }, { "epoch": 0.523114872623655, "eval_f1": 0.647731402976503, "eval_loss": 0.024424098432064056, "eval_precision": 0.8184264695956656, "eval_recall": 0.5359508538154844, "eval_runtime": 231.7976, "eval_samples_per_second": 405.742, "eval_steps_per_second": 12.683, "step": 24600 }, { "epoch": 0.5241781142346786, "grad_norm": 0.3346813917160034, "learning_rate": 1.8951686301195086e-05, "loss": 0.0235, "step": 24650 }, { "epoch": 0.5252413558457024, "grad_norm": 0.6006489992141724, "learning_rate": 1.894955981797304e-05, "loss": 0.0235, "step": 24700 }, { "epoch": 0.5263045974567261, "grad_norm": 0.6295232176780701, "learning_rate": 1.894743333475099e-05, "loss": 0.0251, "step": 24750 }, { "epoch": 0.5273678390677498, "grad_norm": 0.4825250804424286, "learning_rate": 1.8945306851528943e-05, "loss": 0.0276, "step": 24800 }, { "epoch": 0.5273678390677498, "eval_f1": 0.6367447415181618, "eval_loss": 0.024440057575702667, "eval_precision": 0.8142882958857199, "eval_recall": 0.5227638099341739, "eval_runtime": 232.0265, "eval_samples_per_second": 405.342, "eval_steps_per_second": 12.671, "step": 24800 }, { "epoch": 0.5284310806787734, "grad_norm": 0.370063841342926, "learning_rate": 1.8943180368306894e-05, "loss": 0.0277, "step": 24850 }, { "epoch": 0.5294943222897971, "grad_norm": 0.3872455656528473, "learning_rate": 1.8941053885084847e-05, "loss": 0.0217, "step": 24900 }, { "epoch": 0.5305575639008209, "grad_norm": 0.7164714932441711, "learning_rate": 1.89389274018628e-05, "loss": 0.0226, "step": 24950 }, { "epoch": 0.5316208055118445, "grad_norm": 0.5243107676506042, "learning_rate": 1.8936800918640755e-05, "loss": 0.0269, "step": 25000 }, { "epoch": 0.5316208055118445, "eval_f1": 0.6339068486784188, "eval_loss": 0.024591486901044846, "eval_precision": 0.8382474535921534, "eval_recall": 0.5096651584710039, "eval_runtime": 231.87, "eval_samples_per_second": 405.615, "eval_steps_per_second": 12.68, "step": 25000 }, { "epoch": 0.5326840471228682, "grad_norm": 0.20347608625888824, "learning_rate": 1.8934674435418705e-05, "loss": 0.0255, "step": 25050 }, { "epoch": 0.5337472887338919, "grad_norm": 0.4810965359210968, "learning_rate": 1.893254795219666e-05, "loss": 0.0248, "step": 25100 }, { "epoch": 0.5348105303449155, "grad_norm": 0.26691606640815735, "learning_rate": 1.8930421468974613e-05, "loss": 0.0251, "step": 25150 }, { "epoch": 0.5358737719559392, "grad_norm": 0.7411620020866394, "learning_rate": 1.8928294985752563e-05, "loss": 0.0335, "step": 25200 }, { "epoch": 0.5358737719559392, "eval_f1": 0.6371455786823961, "eval_loss": 0.024904271587729454, "eval_precision": 0.8462073382742387, "eval_recall": 0.5109192259033699, "eval_runtime": 232.3089, "eval_samples_per_second": 404.849, "eval_steps_per_second": 12.656, "step": 25200 }, { "epoch": 0.536937013566963, "grad_norm": 0.23887261748313904, "learning_rate": 1.8926168502530517e-05, "loss": 0.0272, "step": 25250 }, { "epoch": 0.5380002551779867, "grad_norm": 0.35524794459342957, "learning_rate": 1.8924042019308467e-05, "loss": 0.0252, "step": 25300 }, { "epoch": 0.5390634967890103, "grad_norm": 0.3107587695121765, "learning_rate": 1.892191553608642e-05, "loss": 0.0272, "step": 25350 }, { "epoch": 0.540126738400034, "grad_norm": 0.3026450574398041, "learning_rate": 1.8919789052864374e-05, "loss": 0.0256, "step": 25400 }, { "epoch": 0.540126738400034, "eval_f1": 0.6362170940884402, "eval_loss": 0.02447829209268093, "eval_precision": 0.8539165875718038, "eval_recall": 0.5069691897177239, "eval_runtime": 231.9412, "eval_samples_per_second": 405.491, "eval_steps_per_second": 12.676, "step": 25400 }, { "epoch": 0.5411899800110577, "grad_norm": 0.5823214054107666, "learning_rate": 1.8917662569642328e-05, "loss": 0.0262, "step": 25450 }, { "epoch": 0.5422532216220814, "grad_norm": 0.7562184929847717, "learning_rate": 1.891553608642028e-05, "loss": 0.0215, "step": 25500 }, { "epoch": 0.5433164632331051, "grad_norm": 0.3036539554595947, "learning_rate": 1.8913409603198232e-05, "loss": 0.0248, "step": 25550 }, { "epoch": 0.5443797048441288, "grad_norm": 0.41531044244766235, "learning_rate": 1.8911283119976186e-05, "loss": 0.0256, "step": 25600 }, { "epoch": 0.5443797048441288, "eval_f1": 0.63298374384721, "eval_loss": 0.025457868352532387, "eval_precision": 0.8271119667717679, "eval_recall": 0.5126594516355076, "eval_runtime": 231.955, "eval_samples_per_second": 405.467, "eval_steps_per_second": 12.675, "step": 25600 }, { "epoch": 0.5454429464551525, "grad_norm": 0.26660653948783875, "learning_rate": 1.8909156636754136e-05, "loss": 0.0231, "step": 25650 }, { "epoch": 0.5465061880661761, "grad_norm": 0.4170743525028229, "learning_rate": 1.890703015353209e-05, "loss": 0.0212, "step": 25700 }, { "epoch": 0.5475694296771998, "grad_norm": 0.6175497174263, "learning_rate": 1.890490367031004e-05, "loss": 0.0223, "step": 25750 }, { "epoch": 0.5486326712882236, "grad_norm": 0.44958996772766113, "learning_rate": 1.8902777187087994e-05, "loss": 0.027, "step": 25800 }, { "epoch": 0.5486326712882236, "eval_f1": 0.6349181542343625, "eval_loss": 0.024325242266058922, "eval_precision": 0.8220124681709701, "eval_recall": 0.517200612117467, "eval_runtime": 231.8633, "eval_samples_per_second": 405.627, "eval_steps_per_second": 12.68, "step": 25800 }, { "epoch": 0.5496959128992472, "grad_norm": 0.3918847441673279, "learning_rate": 1.8900650703865948e-05, "loss": 0.0259, "step": 25850 }, { "epoch": 0.5507591545102709, "grad_norm": 0.48117050528526306, "learning_rate": 1.88985242206439e-05, "loss": 0.0248, "step": 25900 }, { "epoch": 0.5518223961212946, "grad_norm": 0.4199369549751282, "learning_rate": 1.8896397737421855e-05, "loss": 0.0234, "step": 25950 }, { "epoch": 0.5528856377323182, "grad_norm": 0.5159252882003784, "learning_rate": 1.8894271254199806e-05, "loss": 0.026, "step": 26000 }, { "epoch": 0.5528856377323182, "eval_f1": 0.6241994258601885, "eval_loss": 0.024869119748473167, "eval_precision": 0.8299372106878565, "eval_recall": 0.500201645203855, "eval_runtime": 231.7235, "eval_samples_per_second": 405.872, "eval_steps_per_second": 12.688, "step": 26000 }, { "epoch": 0.553948879343342, "grad_norm": 0.4935058653354645, "learning_rate": 1.889214477097776e-05, "loss": 0.026, "step": 26050 }, { "epoch": 0.5550121209543657, "grad_norm": 0.7703174948692322, "learning_rate": 1.8890018287755713e-05, "loss": 0.0221, "step": 26100 }, { "epoch": 0.5560753625653894, "grad_norm": 0.3290540277957916, "learning_rate": 1.8887891804533663e-05, "loss": 0.0255, "step": 26150 }, { "epoch": 0.557138604176413, "grad_norm": 1.0785351991653442, "learning_rate": 1.8885765321311617e-05, "loss": 0.0231, "step": 26200 }, { "epoch": 0.557138604176413, "eval_f1": 0.6388427135903145, "eval_loss": 0.024177517741918564, "eval_precision": 0.8222239614584521, "eval_recall": 0.5223439459480074, "eval_runtime": 231.875, "eval_samples_per_second": 405.606, "eval_steps_per_second": 12.679, "step": 26200 }, { "epoch": 0.5582018457874367, "grad_norm": 0.3494855761528015, "learning_rate": 1.8883638838089567e-05, "loss": 0.0241, "step": 26250 }, { "epoch": 0.5592650873984605, "grad_norm": 1.1778019666671753, "learning_rate": 1.888151235486752e-05, "loss": 0.0282, "step": 26300 }, { "epoch": 0.5603283290094841, "grad_norm": 0.8221080899238586, "learning_rate": 1.8879385871645475e-05, "loss": 0.0241, "step": 26350 }, { "epoch": 0.5613915706205078, "grad_norm": 0.346680223941803, "learning_rate": 1.887725938842343e-05, "loss": 0.0257, "step": 26400 }, { "epoch": 0.5613915706205078, "eval_f1": 0.6499522080150222, "eval_loss": 0.024333016946911812, "eval_precision": 0.813854145023826, "eval_recall": 0.5410002707017506, "eval_runtime": 231.9799, "eval_samples_per_second": 405.423, "eval_steps_per_second": 12.674, "step": 26400 }, { "epoch": 0.5624548122315315, "grad_norm": 0.48338961601257324, "learning_rate": 1.887513290520138e-05, "loss": 0.0252, "step": 26450 }, { "epoch": 0.5635180538425552, "grad_norm": 0.6900681257247925, "learning_rate": 1.8873006421979333e-05, "loss": 0.0226, "step": 26500 }, { "epoch": 0.5645812954535788, "grad_norm": 0.6199989318847656, "learning_rate": 1.8870879938757286e-05, "loss": 0.0225, "step": 26550 }, { "epoch": 0.5656445370646026, "grad_norm": 1.4578720331192017, "learning_rate": 1.8868753455535237e-05, "loss": 0.0251, "step": 26600 }, { "epoch": 0.5656445370646026, "eval_f1": 0.6379156913775884, "eval_loss": 0.024205397814512253, "eval_precision": 0.8276576060618681, "eval_recall": 0.5189463623757385, "eval_runtime": 231.7148, "eval_samples_per_second": 405.887, "eval_steps_per_second": 12.688, "step": 26600 }, { "epoch": 0.5667077786756263, "grad_norm": 0.664051353931427, "learning_rate": 1.886662697231319e-05, "loss": 0.0241, "step": 26650 }, { "epoch": 0.5677710202866499, "grad_norm": 0.23796804249286652, "learning_rate": 1.886450048909114e-05, "loss": 0.0237, "step": 26700 }, { "epoch": 0.5688342618976736, "grad_norm": 0.28993746638298035, "learning_rate": 1.8862374005869094e-05, "loss": 0.0266, "step": 26750 }, { "epoch": 0.5698975035086973, "grad_norm": 0.1489747017621994, "learning_rate": 1.8860247522647048e-05, "loss": 0.0278, "step": 26800 }, { "epoch": 0.5698975035086973, "eval_f1": 0.640523005981798, "eval_loss": 0.024094751104712486, "eval_precision": 0.848387949019209, "eval_recall": 0.5144714962073843, "eval_runtime": 231.8311, "eval_samples_per_second": 405.683, "eval_steps_per_second": 12.682, "step": 26800 }, { "epoch": 0.5709607451197211, "grad_norm": 1.935327410697937, "learning_rate": 1.8858121039425002e-05, "loss": 0.025, "step": 26850 }, { "epoch": 0.5720239867307447, "grad_norm": 0.3005645275115967, "learning_rate": 1.8855994556202952e-05, "loss": 0.0267, "step": 26900 }, { "epoch": 0.5730872283417684, "grad_norm": 0.1870880126953125, "learning_rate": 1.8853868072980906e-05, "loss": 0.0262, "step": 26950 }, { "epoch": 0.5741504699527921, "grad_norm": 0.6408060789108276, "learning_rate": 1.885174158975886e-05, "loss": 0.0265, "step": 27000 }, { "epoch": 0.5741504699527921, "eval_f1": 0.6278788512144278, "eval_loss": 0.023864848539233208, "eval_precision": 0.8587658913538706, "eval_recall": 0.49483733032796373, "eval_runtime": 231.8676, "eval_samples_per_second": 405.619, "eval_steps_per_second": 12.68, "step": 27000 }, { "epoch": 0.5752137115638157, "grad_norm": 0.6244999766349792, "learning_rate": 1.884961510653681e-05, "loss": 0.0246, "step": 27050 }, { "epoch": 0.5762769531748394, "grad_norm": 0.7110171914100647, "learning_rate": 1.8847488623314764e-05, "loss": 0.0302, "step": 27100 }, { "epoch": 0.5773401947858632, "grad_norm": 0.6125534772872925, "learning_rate": 1.8845362140092714e-05, "loss": 0.0252, "step": 27150 }, { "epoch": 0.5784034363968868, "grad_norm": 0.7586866021156311, "learning_rate": 1.8843235656870668e-05, "loss": 0.0279, "step": 27200 }, { "epoch": 0.5784034363968868, "eval_f1": 0.6348823914336535, "eval_loss": 0.023722343146800995, "eval_precision": 0.8379578045310906, "eval_recall": 0.511035240952179, "eval_runtime": 231.8968, "eval_samples_per_second": 405.568, "eval_steps_per_second": 12.678, "step": 27200 }, { "epoch": 0.5794666780079105, "grad_norm": 0.6077877283096313, "learning_rate": 1.884110917364862e-05, "loss": 0.0329, "step": 27250 }, { "epoch": 0.5805299196189342, "grad_norm": 0.19713415205478668, "learning_rate": 1.8838982690426575e-05, "loss": 0.0247, "step": 27300 }, { "epoch": 0.5815931612299579, "grad_norm": 0.3155173659324646, "learning_rate": 1.8836856207204526e-05, "loss": 0.0256, "step": 27350 }, { "epoch": 0.5826564028409816, "grad_norm": 0.6506646275520325, "learning_rate": 1.883472972398248e-05, "loss": 0.0222, "step": 27400 }, { "epoch": 0.5826564028409816, "eval_f1": 0.6445394731170608, "eval_loss": 0.023835517466068268, "eval_precision": 0.822298606229893, "eval_recall": 0.5299733165387446, "eval_runtime": 231.846, "eval_samples_per_second": 405.657, "eval_steps_per_second": 12.681, "step": 27400 }, { "epoch": 0.5837196444520053, "grad_norm": 0.43126264214515686, "learning_rate": 1.8832603240760433e-05, "loss": 0.0231, "step": 27450 }, { "epoch": 0.584782886063029, "grad_norm": 0.2863496243953705, "learning_rate": 1.8830476757538387e-05, "loss": 0.0306, "step": 27500 }, { "epoch": 0.5858461276740526, "grad_norm": 0.573489248752594, "learning_rate": 1.8828350274316337e-05, "loss": 0.022, "step": 27550 }, { "epoch": 0.5869093692850763, "grad_norm": 0.17071206867694855, "learning_rate": 1.882622379109429e-05, "loss": 0.0243, "step": 27600 }, { "epoch": 0.5869093692850763, "eval_f1": 0.6488756911663032, "eval_loss": 0.024965867400169373, "eval_precision": 0.7823574820463026, "eval_recall": 0.5543033296318701, "eval_runtime": 231.9171, "eval_samples_per_second": 405.533, "eval_steps_per_second": 12.677, "step": 27600 }, { "epoch": 0.5879726108961, "grad_norm": 0.37096431851387024, "learning_rate": 1.882409730787224e-05, "loss": 0.0219, "step": 27650 }, { "epoch": 0.5890358525071238, "grad_norm": 0.2921299934387207, "learning_rate": 1.8821970824650195e-05, "loss": 0.025, "step": 27700 }, { "epoch": 0.5900990941181474, "grad_norm": 0.8317161202430725, "learning_rate": 1.881984434142815e-05, "loss": 0.0283, "step": 27750 }, { "epoch": 0.5911623357291711, "grad_norm": 0.5182548761367798, "learning_rate": 1.88177178582061e-05, "loss": 0.0325, "step": 27800 }, { "epoch": 0.5911623357291711, "eval_f1": 0.6404502126709326, "eval_loss": 0.02441900409758091, "eval_precision": 0.7962160031532654, "eval_recall": 0.5356580539303945, "eval_runtime": 231.7673, "eval_samples_per_second": 405.795, "eval_steps_per_second": 12.685, "step": 27800 }, { "epoch": 0.5922255773401948, "grad_norm": 0.3758147954940796, "learning_rate": 1.8815591374984053e-05, "loss": 0.0285, "step": 27850 }, { "epoch": 0.5932888189512184, "grad_norm": 0.5373481512069702, "learning_rate": 1.8813464891762006e-05, "loss": 0.0213, "step": 27900 }, { "epoch": 0.5943520605622422, "grad_norm": 0.4656599462032318, "learning_rate": 1.881133840853996e-05, "loss": 0.024, "step": 27950 }, { "epoch": 0.5954153021732659, "grad_norm": 0.4903629422187805, "learning_rate": 1.880921192531791e-05, "loss": 0.0264, "step": 28000 }, { "epoch": 0.5954153021732659, "eval_f1": 0.6427720084638049, "eval_loss": 0.023767225444316864, "eval_precision": 0.8228246347454361, "eval_recall": 0.5273712647297386, "eval_runtime": 231.5843, "eval_samples_per_second": 406.116, "eval_steps_per_second": 12.695, "step": 28000 }, { "epoch": 0.5964785437842895, "grad_norm": 0.5392729640007019, "learning_rate": 1.8807085442095864e-05, "loss": 0.0203, "step": 28050 }, { "epoch": 0.5975417853953132, "grad_norm": 0.38880109786987305, "learning_rate": 1.8804958958873814e-05, "loss": 0.0275, "step": 28100 }, { "epoch": 0.5986050270063369, "grad_norm": 0.3447709083557129, "learning_rate": 1.8802832475651768e-05, "loss": 0.0285, "step": 28150 }, { "epoch": 0.5996682686173606, "grad_norm": 0.3016016185283661, "learning_rate": 1.8800705992429722e-05, "loss": 0.0252, "step": 28200 }, { "epoch": 0.5996682686173606, "eval_f1": 0.647240798159373, "eval_loss": 0.023675519973039627, "eval_precision": 0.8120705756786729, "eval_recall": 0.5380336001679158, "eval_runtime": 232.0055, "eval_samples_per_second": 405.378, "eval_steps_per_second": 12.672, "step": 28200 }, { "epoch": 0.6007315102283843, "grad_norm": 0.3209368884563446, "learning_rate": 1.8798579509207672e-05, "loss": 0.0217, "step": 28250 }, { "epoch": 0.601794751839408, "grad_norm": 0.5245710015296936, "learning_rate": 1.8796453025985626e-05, "loss": 0.0284, "step": 28300 }, { "epoch": 0.6028579934504317, "grad_norm": 0.38217225670814514, "learning_rate": 1.879432654276358e-05, "loss": 0.0248, "step": 28350 }, { "epoch": 0.6039212350614553, "grad_norm": 0.6675688028335571, "learning_rate": 1.8792200059541533e-05, "loss": 0.0255, "step": 28400 }, { "epoch": 0.6039212350614553, "eval_f1": 0.6514516124286919, "eval_loss": 0.0241817906498909, "eval_precision": 0.8232067973370214, "eval_recall": 0.5389948677151919, "eval_runtime": 231.8755, "eval_samples_per_second": 405.606, "eval_steps_per_second": 12.679, "step": 28400 }, { "epoch": 0.604984476672479, "grad_norm": 1.0646334886550903, "learning_rate": 1.8790073576319484e-05, "loss": 0.0254, "step": 28450 }, { "epoch": 0.6060477182835028, "grad_norm": 0.31649988889694214, "learning_rate": 1.8787947093097437e-05, "loss": 0.026, "step": 28500 }, { "epoch": 0.6071109598945265, "grad_norm": 0.7116311192512512, "learning_rate": 1.8785820609875388e-05, "loss": 0.0232, "step": 28550 }, { "epoch": 0.6081742015055501, "grad_norm": 0.40361514687538147, "learning_rate": 1.878369412665334e-05, "loss": 0.0194, "step": 28600 }, { "epoch": 0.6081742015055501, "eval_f1": 0.6413710567260923, "eval_loss": 0.024835892021656036, "eval_precision": 0.8407834493633273, "eval_recall": 0.5184160078668966, "eval_runtime": 231.4193, "eval_samples_per_second": 406.405, "eval_steps_per_second": 12.704, "step": 28600 }, { "epoch": 0.6092374431165738, "grad_norm": 0.29674679040908813, "learning_rate": 1.8781567643431295e-05, "loss": 0.0207, "step": 28650 }, { "epoch": 0.6103006847275975, "grad_norm": 0.877382218837738, "learning_rate": 1.877944116020925e-05, "loss": 0.027, "step": 28700 }, { "epoch": 0.6113639263386211, "grad_norm": 1.1225852966308594, "learning_rate": 1.87773146769872e-05, "loss": 0.0234, "step": 28750 }, { "epoch": 0.6124271679496449, "grad_norm": 0.3171367347240448, "learning_rate": 1.8775188193765153e-05, "loss": 0.0226, "step": 28800 }, { "epoch": 0.6124271679496449, "eval_f1": 0.6489157038168927, "eval_loss": 0.024887993931770325, "eval_precision": 0.8224266576224503, "eval_recall": 0.535862461397344, "eval_runtime": 231.7903, "eval_samples_per_second": 405.755, "eval_steps_per_second": 12.684, "step": 28800 }, { "epoch": 0.6134904095606686, "grad_norm": 0.3892594575881958, "learning_rate": 1.8773061710543107e-05, "loss": 0.0276, "step": 28850 }, { "epoch": 0.6145536511716923, "grad_norm": 0.6711094975471497, "learning_rate": 1.877093522732106e-05, "loss": 0.0261, "step": 28900 }, { "epoch": 0.6156168927827159, "grad_norm": 0.5974873304367065, "learning_rate": 1.876880874409901e-05, "loss": 0.0273, "step": 28950 }, { "epoch": 0.6166801343937396, "grad_norm": 0.6817318797111511, "learning_rate": 1.8766682260876964e-05, "loss": 0.0239, "step": 29000 }, { "epoch": 0.6166801343937396, "eval_f1": 0.6398723149186711, "eval_loss": 0.024121103808283806, "eval_precision": 0.8274861745295109, "eval_recall": 0.5216091839722159, "eval_runtime": 231.7478, "eval_samples_per_second": 405.829, "eval_steps_per_second": 12.686, "step": 29000 }, { "epoch": 0.6177433760047634, "grad_norm": 0.47577857971191406, "learning_rate": 1.8764555777654915e-05, "loss": 0.0257, "step": 29050 }, { "epoch": 0.618806617615787, "grad_norm": 0.49080386757850647, "learning_rate": 1.876242929443287e-05, "loss": 0.0202, "step": 29100 }, { "epoch": 0.6198698592268107, "grad_norm": 0.40597933530807495, "learning_rate": 1.8760302811210822e-05, "loss": 0.0257, "step": 29150 }, { "epoch": 0.6209331008378344, "grad_norm": 0.9921469688415527, "learning_rate": 1.8758176327988773e-05, "loss": 0.0198, "step": 29200 }, { "epoch": 0.6209331008378344, "eval_f1": 0.637957800258359, "eval_loss": 0.026490163058042526, "eval_precision": 0.8314236773671748, "eval_recall": 0.5175320836854933, "eval_runtime": 231.8791, "eval_samples_per_second": 405.599, "eval_steps_per_second": 12.679, "step": 29200 }, { "epoch": 0.621996342448858, "grad_norm": 0.4535175561904907, "learning_rate": 1.8756049844766726e-05, "loss": 0.0311, "step": 29250 }, { "epoch": 0.6230595840598818, "grad_norm": 0.37642785906791687, "learning_rate": 1.875392336154468e-05, "loss": 0.0274, "step": 29300 }, { "epoch": 0.6241228256709055, "grad_norm": 0.19824492931365967, "learning_rate": 1.8751796878322634e-05, "loss": 0.0237, "step": 29350 }, { "epoch": 0.6251860672819292, "grad_norm": 0.08884403109550476, "learning_rate": 1.8749670395100584e-05, "loss": 0.0244, "step": 29400 }, { "epoch": 0.6251860672819292, "eval_f1": 0.6408732966889238, "eval_loss": 0.023244598880410194, "eval_precision": 0.8437034831794417, "eval_recall": 0.5166647330824913, "eval_runtime": 231.7029, "eval_samples_per_second": 405.908, "eval_steps_per_second": 12.689, "step": 29400 }, { "epoch": 0.6262493088929528, "grad_norm": 0.41402560472488403, "learning_rate": 1.8747543911878538e-05, "loss": 0.0227, "step": 29450 }, { "epoch": 0.6273125505039765, "grad_norm": 0.2086181789636612, "learning_rate": 1.8745417428656488e-05, "loss": 0.0222, "step": 29500 }, { "epoch": 0.6283757921150002, "grad_norm": 0.3521628677845001, "learning_rate": 1.8743290945434442e-05, "loss": 0.0328, "step": 29550 }, { "epoch": 0.6294390337260239, "grad_norm": 0.4565214514732361, "learning_rate": 1.8741164462212395e-05, "loss": 0.0287, "step": 29600 }, { "epoch": 0.6294390337260239, "eval_f1": 0.6513224864609578, "eval_loss": 0.02459462732076645, "eval_precision": 0.7925782178217194, "eval_recall": 0.5528006585234846, "eval_runtime": 231.5347, "eval_samples_per_second": 406.203, "eval_steps_per_second": 12.698, "step": 29600 }, { "epoch": 0.6305022753370476, "grad_norm": 0.5984659194946289, "learning_rate": 1.8739037978990346e-05, "loss": 0.0248, "step": 29650 }, { "epoch": 0.6315655169480713, "grad_norm": 0.3425915539264679, "learning_rate": 1.87369114957683e-05, "loss": 0.0259, "step": 29700 }, { "epoch": 0.632628758559095, "grad_norm": 0.8634417653083801, "learning_rate": 1.8734785012546253e-05, "loss": 0.0268, "step": 29750 }, { "epoch": 0.6336920001701186, "grad_norm": 1.4781534671783447, "learning_rate": 1.8732658529324207e-05, "loss": 0.0261, "step": 29800 }, { "epoch": 0.6336920001701186, "eval_f1": 0.6509194444160648, "eval_loss": 0.023368002846837044, "eval_precision": 0.8349274434854078, "eval_recall": 0.5333709001110135, "eval_runtime": 231.892, "eval_samples_per_second": 405.577, "eval_steps_per_second": 12.678, "step": 29800 }, { "epoch": 0.6347552417811424, "grad_norm": 0.755605161190033, "learning_rate": 1.8730532046102157e-05, "loss": 0.027, "step": 29850 }, { "epoch": 0.6358184833921661, "grad_norm": 0.7616377472877502, "learning_rate": 1.872840556288011e-05, "loss": 0.0279, "step": 29900 }, { "epoch": 0.6368817250031897, "grad_norm": 0.38013267517089844, "learning_rate": 1.872627907965806e-05, "loss": 0.0224, "step": 29950 }, { "epoch": 0.6379449666142134, "grad_norm": 0.7567084431648254, "learning_rate": 1.8724152596436015e-05, "loss": 0.0234, "step": 30000 }, { "epoch": 0.6379449666142134, "eval_f1": 0.6495745301499488, "eval_loss": 0.02365274354815483, "eval_precision": 0.822203044224956, "eval_recall": 0.5368568761014227, "eval_runtime": 231.4877, "eval_samples_per_second": 406.285, "eval_steps_per_second": 12.7, "step": 30000 }, { "epoch": 0.6390082082252371, "grad_norm": 2.1393368244171143, "learning_rate": 1.872202611321397e-05, "loss": 0.0266, "step": 30050 }, { "epoch": 0.6400714498362607, "grad_norm": 0.49971067905426025, "learning_rate": 1.871989962999192e-05, "loss": 0.0261, "step": 30100 }, { "epoch": 0.6411346914472845, "grad_norm": 0.634192705154419, "learning_rate": 1.8717773146769873e-05, "loss": 0.0263, "step": 30150 }, { "epoch": 0.6421979330583082, "grad_norm": 0.7146007418632507, "learning_rate": 1.8715646663547827e-05, "loss": 0.0223, "step": 30200 }, { "epoch": 0.6421979330583082, "eval_f1": 0.6449515096435737, "eval_loss": 0.023578092455863953, "eval_precision": 0.8307183282541724, "eval_recall": 0.5270839893707826, "eval_runtime": 231.5266, "eval_samples_per_second": 406.217, "eval_steps_per_second": 12.698, "step": 30200 }, { "epoch": 0.6432611746693319, "grad_norm": 0.2872869372367859, "learning_rate": 1.871352018032578e-05, "loss": 0.027, "step": 30250 }, { "epoch": 0.6443244162803555, "grad_norm": 0.8617602586746216, "learning_rate": 1.8711393697103734e-05, "loss": 0.0212, "step": 30300 }, { "epoch": 0.6453876578913792, "grad_norm": 0.17351259291172028, "learning_rate": 1.8709267213881684e-05, "loss": 0.0232, "step": 30350 }, { "epoch": 0.646450899502403, "grad_norm": 0.5695050358772278, "learning_rate": 1.8707140730659638e-05, "loss": 0.0232, "step": 30400 }, { "epoch": 0.646450899502403, "eval_f1": 0.6501715431092207, "eval_loss": 0.0239565372467041, "eval_precision": 0.8127952658013151, "eval_recall": 0.5417737043604786, "eval_runtime": 231.69, "eval_samples_per_second": 405.93, "eval_steps_per_second": 12.689, "step": 30400 }, { "epoch": 0.6475141411134266, "grad_norm": 0.7928967475891113, "learning_rate": 1.870501424743759e-05, "loss": 0.0286, "step": 30450 }, { "epoch": 0.6485773827244503, "grad_norm": 0.17823879420757294, "learning_rate": 1.8702887764215542e-05, "loss": 0.0279, "step": 30500 }, { "epoch": 0.649640624335474, "grad_norm": 0.4964252710342407, "learning_rate": 1.8700761280993492e-05, "loss": 0.0236, "step": 30550 }, { "epoch": 0.6507038659464977, "grad_norm": 0.34295615553855896, "learning_rate": 1.8698634797771446e-05, "loss": 0.0215, "step": 30600 }, { "epoch": 0.6507038659464977, "eval_f1": 0.6501578932143386, "eval_loss": 0.02340841107070446, "eval_precision": 0.8047788303196645, "eval_recall": 0.5453756953996969, "eval_runtime": 231.5786, "eval_samples_per_second": 406.126, "eval_steps_per_second": 12.695, "step": 30600 }, { "epoch": 0.6517671075575213, "grad_norm": 0.2491130381822586, "learning_rate": 1.86965083145494e-05, "loss": 0.0271, "step": 30650 }, { "epoch": 0.6528303491685451, "grad_norm": 0.10373938083648682, "learning_rate": 1.8694381831327354e-05, "loss": 0.022, "step": 30700 }, { "epoch": 0.6538935907795688, "grad_norm": 0.546635627746582, "learning_rate": 1.8692255348105307e-05, "loss": 0.0267, "step": 30750 }, { "epoch": 0.6549568323905924, "grad_norm": 0.21878832578659058, "learning_rate": 1.8690128864883258e-05, "loss": 0.0283, "step": 30800 }, { "epoch": 0.6549568323905924, "eval_f1": 0.6445805377766247, "eval_loss": 0.023525765165686607, "eval_precision": 0.8127900719714122, "eval_recall": 0.5340559413516011, "eval_runtime": 231.4558, "eval_samples_per_second": 406.341, "eval_steps_per_second": 12.702, "step": 30800 }, { "epoch": 0.6560200740016161, "grad_norm": 0.2537536025047302, "learning_rate": 1.868800238166121e-05, "loss": 0.0231, "step": 30850 }, { "epoch": 0.6570833156126398, "grad_norm": 0.16729967296123505, "learning_rate": 1.8685875898439162e-05, "loss": 0.0281, "step": 30900 }, { "epoch": 0.6581465572236636, "grad_norm": 0.4273781478404999, "learning_rate": 1.8683749415217115e-05, "loss": 0.0262, "step": 30950 }, { "epoch": 0.6592097988346872, "grad_norm": 0.6649103164672852, "learning_rate": 1.8681622931995066e-05, "loss": 0.0295, "step": 31000 }, { "epoch": 0.6592097988346872, "eval_f1": 0.6454559378081398, "eval_loss": 0.023893551900982857, "eval_precision": 0.8165219816910124, "eval_recall": 0.5336526509438357, "eval_runtime": 231.5701, "eval_samples_per_second": 406.14, "eval_steps_per_second": 12.696, "step": 31000 }, { "epoch": 0.6602730404457109, "grad_norm": 0.5109022259712219, "learning_rate": 1.867949644877302e-05, "loss": 0.0258, "step": 31050 }, { "epoch": 0.6613362820567346, "grad_norm": 0.3962841331958771, "learning_rate": 1.8677369965550973e-05, "loss": 0.0239, "step": 31100 }, { "epoch": 0.6623995236677582, "grad_norm": 0.4458056688308716, "learning_rate": 1.8675243482328927e-05, "loss": 0.0256, "step": 31150 }, { "epoch": 0.6634627652787819, "grad_norm": 0.1552438586950302, "learning_rate": 1.867311699910688e-05, "loss": 0.0284, "step": 31200 }, { "epoch": 0.6634627652787819, "eval_f1": 0.6360022393766092, "eval_loss": 0.023576226085424423, "eval_precision": 0.8435251798560381, "eval_recall": 0.5104275430774643, "eval_runtime": 231.5287, "eval_samples_per_second": 406.213, "eval_steps_per_second": 12.698, "step": 31200 }, { "epoch": 0.6645260068898057, "grad_norm": 0.44245395064353943, "learning_rate": 1.867099051588483e-05, "loss": 0.0215, "step": 31250 }, { "epoch": 0.6655892485008293, "grad_norm": 0.49932190775871277, "learning_rate": 1.8668864032662785e-05, "loss": 0.0243, "step": 31300 }, { "epoch": 0.666652490111853, "grad_norm": 0.2076929807662964, "learning_rate": 1.8666737549440735e-05, "loss": 0.0246, "step": 31350 }, { "epoch": 0.6677157317228767, "grad_norm": 0.3494974970817566, "learning_rate": 1.866461106621869e-05, "loss": 0.0406, "step": 31400 }, { "epoch": 0.6677157317228767, "eval_f1": 0.6441021531135347, "eval_loss": 0.023667799308896065, "eval_precision": 0.8206380431063274, "eval_recall": 0.5300727580091524, "eval_runtime": 231.4976, "eval_samples_per_second": 406.268, "eval_steps_per_second": 12.7, "step": 31400 }, { "epoch": 0.6687789733339004, "grad_norm": 0.2589725852012634, "learning_rate": 1.8662484582996642e-05, "loss": 0.0273, "step": 31450 }, { "epoch": 0.6698422149449241, "grad_norm": 0.5442560911178589, "learning_rate": 1.8660358099774593e-05, "loss": 0.0269, "step": 31500 }, { "epoch": 0.6709054565559478, "grad_norm": 0.27964067459106445, "learning_rate": 1.8658231616552547e-05, "loss": 0.0224, "step": 31550 }, { "epoch": 0.6719686981669715, "grad_norm": 0.34818556904792786, "learning_rate": 1.86561051333305e-05, "loss": 0.0244, "step": 31600 }, { "epoch": 0.6719686981669715, "eval_f1": 0.6507113279354735, "eval_loss": 0.024029288440942764, "eval_precision": 0.8185510547603807, "eval_recall": 0.5399892824192706, "eval_runtime": 231.6338, "eval_samples_per_second": 406.029, "eval_steps_per_second": 12.692, "step": 31600 }, { "epoch": 0.6730319397779951, "grad_norm": 0.8163536190986633, "learning_rate": 1.8653978650108454e-05, "loss": 0.024, "step": 31650 }, { "epoch": 0.6740951813890188, "grad_norm": 0.527062177658081, "learning_rate": 1.8651852166886404e-05, "loss": 0.0225, "step": 31700 }, { "epoch": 0.6751584230000426, "grad_norm": 0.24055016040802002, "learning_rate": 1.8649725683664358e-05, "loss": 0.0262, "step": 31750 }, { "epoch": 0.6762216646110663, "grad_norm": 0.8475775122642517, "learning_rate": 1.8647599200442312e-05, "loss": 0.0266, "step": 31800 }, { "epoch": 0.6762216646110663, "eval_f1": 0.6412738095683288, "eval_loss": 0.02395857684314251, "eval_precision": 0.8340956340955603, "eval_recall": 0.5208633729441569, "eval_runtime": 231.439, "eval_samples_per_second": 406.371, "eval_steps_per_second": 12.703, "step": 31800 }, { "epoch": 0.6772849062220899, "grad_norm": 0.6620641350746155, "learning_rate": 1.8645472717220262e-05, "loss": 0.0258, "step": 31850 }, { "epoch": 0.6783481478331136, "grad_norm": 0.4231718182563782, "learning_rate": 1.8643346233998216e-05, "loss": 0.028, "step": 31900 }, { "epoch": 0.6794113894441373, "grad_norm": 1.0591089725494385, "learning_rate": 1.8641219750776166e-05, "loss": 0.0236, "step": 31950 }, { "epoch": 0.6804746310551609, "grad_norm": 1.9183076620101929, "learning_rate": 1.863909326755412e-05, "loss": 0.0244, "step": 32000 }, { "epoch": 0.6804746310551609, "eval_f1": 0.6438024133068821, "eval_loss": 0.023557132109999657, "eval_precision": 0.8385454996851563, "eval_recall": 0.5224654855229504, "eval_runtime": 231.4307, "eval_samples_per_second": 406.385, "eval_steps_per_second": 12.704, "step": 32000 }, { "epoch": 0.6815378726661847, "grad_norm": 0.41052934527397156, "learning_rate": 1.8636966784332074e-05, "loss": 0.0264, "step": 32050 }, { "epoch": 0.6826011142772084, "grad_norm": 1.5476129055023193, "learning_rate": 1.8634840301110027e-05, "loss": 0.028, "step": 32100 }, { "epoch": 0.683664355888232, "grad_norm": 0.736318826675415, "learning_rate": 1.8632713817887978e-05, "loss": 0.0249, "step": 32150 }, { "epoch": 0.6847275974992557, "grad_norm": 0.25771278142929077, "learning_rate": 1.863058733466593e-05, "loss": 0.0238, "step": 32200 }, { "epoch": 0.6847275974992557, "eval_f1": 0.6534107315651826, "eval_loss": 0.023887909948825836, "eval_precision": 0.8080714349669273, "eval_recall": 0.5484418074039396, "eval_runtime": 231.5464, "eval_samples_per_second": 406.182, "eval_steps_per_second": 12.697, "step": 32200 }, { "epoch": 0.6857908391102794, "grad_norm": 0.31586137413978577, "learning_rate": 1.8628460851443885e-05, "loss": 0.0295, "step": 32250 }, { "epoch": 0.6868540807213032, "grad_norm": 0.7301635146141052, "learning_rate": 1.8626334368221835e-05, "loss": 0.0207, "step": 32300 }, { "epoch": 0.6879173223323268, "grad_norm": 0.5088218450546265, "learning_rate": 1.862420788499979e-05, "loss": 0.0227, "step": 32350 }, { "epoch": 0.6889805639433505, "grad_norm": 0.4388934075832367, "learning_rate": 1.862208140177774e-05, "loss": 0.0252, "step": 32400 }, { "epoch": 0.6889805639433505, "eval_f1": 0.6517880907908908, "eval_loss": 0.02413228712975979, "eval_precision": 0.8066948130276785, "eval_recall": 0.5467899740899422, "eval_runtime": 231.4902, "eval_samples_per_second": 406.281, "eval_steps_per_second": 12.7, "step": 32400 }, { "epoch": 0.6900438055543742, "grad_norm": 0.8639470338821411, "learning_rate": 1.8619954918555693e-05, "loss": 0.0234, "step": 32450 }, { "epoch": 0.6911070471653978, "grad_norm": 0.5659166574478149, "learning_rate": 1.8617828435333647e-05, "loss": 0.0257, "step": 32500 }, { "epoch": 0.6921702887764215, "grad_norm": 0.2059001624584198, "learning_rate": 1.86157019521116e-05, "loss": 0.0288, "step": 32550 }, { "epoch": 0.6932335303874453, "grad_norm": 0.5089161992073059, "learning_rate": 1.861357546888955e-05, "loss": 0.0268, "step": 32600 }, { "epoch": 0.6932335303874453, "eval_f1": 0.6450886104776588, "eval_loss": 0.022938529029488564, "eval_precision": 0.8441053769395624, "eval_recall": 0.5220124743799812, "eval_runtime": 231.6106, "eval_samples_per_second": 406.069, "eval_steps_per_second": 12.694, "step": 32600 }, { "epoch": 0.694296771998469, "grad_norm": 0.46953845024108887, "learning_rate": 1.8611448985667505e-05, "loss": 0.0237, "step": 32650 }, { "epoch": 0.6953600136094926, "grad_norm": 0.7522330284118652, "learning_rate": 1.860932250244546e-05, "loss": 0.0266, "step": 32700 }, { "epoch": 0.6964232552205163, "grad_norm": 0.3390817940235138, "learning_rate": 1.860719601922341e-05, "loss": 0.0203, "step": 32750 }, { "epoch": 0.69748649683154, "grad_norm": 0.7516964673995972, "learning_rate": 1.8605069536001362e-05, "loss": 0.0275, "step": 32800 }, { "epoch": 0.69748649683154, "eval_f1": 0.6560737493349249, "eval_loss": 0.023623893037438393, "eval_precision": 0.8154112596750621, "eval_recall": 0.5488285242333035, "eval_runtime": 231.6212, "eval_samples_per_second": 406.051, "eval_steps_per_second": 12.693, "step": 32800 }, { "epoch": 0.6985497384425637, "grad_norm": 0.8128226399421692, "learning_rate": 1.8602943052779313e-05, "loss": 0.0216, "step": 32850 }, { "epoch": 0.6996129800535874, "grad_norm": 0.5307417511940002, "learning_rate": 1.8600816569557267e-05, "loss": 0.0241, "step": 32900 }, { "epoch": 0.7006762216646111, "grad_norm": 1.406564474105835, "learning_rate": 1.859869008633522e-05, "loss": 0.031, "step": 32950 }, { "epoch": 0.7017394632756347, "grad_norm": 0.4969779849052429, "learning_rate": 1.8596563603113174e-05, "loss": 0.0239, "step": 33000 }, { "epoch": 0.7017394632756347, "eval_f1": 0.6494263352777426, "eval_loss": 0.02361537329852581, "eval_precision": 0.8175826387374162, "eval_recall": 0.5386412980426306, "eval_runtime": 231.5228, "eval_samples_per_second": 406.224, "eval_steps_per_second": 12.699, "step": 33000 }, { "epoch": 0.7028027048866584, "grad_norm": 0.5012500882148743, "learning_rate": 1.8594437119891128e-05, "loss": 0.0295, "step": 33050 }, { "epoch": 0.7038659464976821, "grad_norm": 0.13700073957443237, "learning_rate": 1.8592310636669078e-05, "loss": 0.0216, "step": 33100 }, { "epoch": 0.7049291881087059, "grad_norm": 1.0114623308181763, "learning_rate": 1.8590184153447032e-05, "loss": 0.0197, "step": 33150 }, { "epoch": 0.7059924297197295, "grad_norm": 0.3119146227836609, "learning_rate": 1.8588057670224985e-05, "loss": 0.0237, "step": 33200 }, { "epoch": 0.7059924297197295, "eval_f1": 0.6404488236657208, "eval_loss": 0.02364823967218399, "eval_precision": 0.8295709176544293, "eval_recall": 0.5215484141847444, "eval_runtime": 231.6191, "eval_samples_per_second": 406.055, "eval_steps_per_second": 12.693, "step": 33200 }, { "epoch": 0.7070556713307532, "grad_norm": 0.6313928365707397, "learning_rate": 1.8585931187002936e-05, "loss": 0.0252, "step": 33250 }, { "epoch": 0.7081189129417769, "grad_norm": 0.3690483272075653, "learning_rate": 1.8583804703780886e-05, "loss": 0.0227, "step": 33300 }, { "epoch": 0.7091821545528005, "grad_norm": 1.4363881349563599, "learning_rate": 1.858167822055884e-05, "loss": 0.0283, "step": 33350 }, { "epoch": 0.7102453961638243, "grad_norm": 0.3056366741657257, "learning_rate": 1.8579551737336794e-05, "loss": 0.0256, "step": 33400 }, { "epoch": 0.7102453961638243, "eval_f1": 0.6510264654048161, "eval_loss": 0.023541219532489777, "eval_precision": 0.8059127116777844, "eval_recall": 0.5460773102186858, "eval_runtime": 231.787, "eval_samples_per_second": 405.76, "eval_steps_per_second": 12.684, "step": 33400 }, { "epoch": 0.711308637774848, "grad_norm": 0.55170077085495, "learning_rate": 1.8577425254114747e-05, "loss": 0.0247, "step": 33450 }, { "epoch": 0.7123718793858717, "grad_norm": 0.668157696723938, "learning_rate": 1.85752987708927e-05, "loss": 0.0292, "step": 33500 }, { "epoch": 0.7134351209968953, "grad_norm": 0.9204743504524231, "learning_rate": 1.857317228767065e-05, "loss": 0.0248, "step": 33550 }, { "epoch": 0.714498362607919, "grad_norm": 1.3219493627548218, "learning_rate": 1.8571045804448605e-05, "loss": 0.0269, "step": 33600 }, { "epoch": 0.714498362607919, "eval_f1": 0.6572848482778452, "eval_loss": 0.025472844019532204, "eval_precision": 0.7832030503785631, "eval_recall": 0.5662473551330821, "eval_runtime": 231.5125, "eval_samples_per_second": 406.242, "eval_steps_per_second": 12.699, "step": 33600 }, { "epoch": 0.7155616042189427, "grad_norm": 0.4387553036212921, "learning_rate": 1.856891932122656e-05, "loss": 0.0279, "step": 33650 }, { "epoch": 0.7166248458299664, "grad_norm": 0.7638331651687622, "learning_rate": 1.856679283800451e-05, "loss": 0.0312, "step": 33700 }, { "epoch": 0.7176880874409901, "grad_norm": 0.31249016523361206, "learning_rate": 1.8564666354782463e-05, "loss": 0.0248, "step": 33750 }, { "epoch": 0.7187513290520138, "grad_norm": 0.38072675466537476, "learning_rate": 1.8562539871560413e-05, "loss": 0.024, "step": 33800 }, { "epoch": 0.7187513290520138, "eval_f1": 0.6514273379942868, "eval_loss": 0.022883595898747444, "eval_precision": 0.8311814128943046, "eval_recall": 0.5355972841429231, "eval_runtime": 231.419, "eval_samples_per_second": 406.406, "eval_steps_per_second": 12.704, "step": 33800 }, { "epoch": 0.7198145706630374, "grad_norm": 1.3494670391082764, "learning_rate": 1.8560413388338367e-05, "loss": 0.0261, "step": 33850 }, { "epoch": 0.7208778122740611, "grad_norm": 0.3457150459289551, "learning_rate": 1.855828690511632e-05, "loss": 0.0222, "step": 33900 }, { "epoch": 0.7219410538850849, "grad_norm": 0.28656652569770813, "learning_rate": 1.8556160421894274e-05, "loss": 0.0264, "step": 33950 }, { "epoch": 0.7230042954961086, "grad_norm": 0.6843706965446472, "learning_rate": 1.8554033938672225e-05, "loss": 0.0221, "step": 34000 }, { "epoch": 0.7230042954961086, "eval_f1": 0.6554697731517629, "eval_loss": 0.023647084832191467, "eval_precision": 0.813087017704551, "eval_recall": 0.5490384562263868, "eval_runtime": 231.6119, "eval_samples_per_second": 406.067, "eval_steps_per_second": 12.694, "step": 34000 }, { "epoch": 0.7240675371071322, "grad_norm": 1.3421539068222046, "learning_rate": 1.855190745545018e-05, "loss": 0.0262, "step": 34050 }, { "epoch": 0.7251307787181559, "grad_norm": 0.4644365906715393, "learning_rate": 1.8549780972228132e-05, "loss": 0.0221, "step": 34100 }, { "epoch": 0.7261940203291796, "grad_norm": 0.4746440649032593, "learning_rate": 1.8547654489006082e-05, "loss": 0.0258, "step": 34150 }, { "epoch": 0.7272572619402032, "grad_norm": 0.15176118910312653, "learning_rate": 1.8545528005784036e-05, "loss": 0.0262, "step": 34200 }, { "epoch": 0.7272572619402032, "eval_f1": 0.6544590931892912, "eval_loss": 0.023032473400235176, "eval_precision": 0.8258040621869123, "eval_recall": 0.5420002099319631, "eval_runtime": 231.4688, "eval_samples_per_second": 406.318, "eval_steps_per_second": 12.701, "step": 34200 }, { "epoch": 0.728320503551227, "grad_norm": 1.1328974962234497, "learning_rate": 1.8543401522561986e-05, "loss": 0.0244, "step": 34250 }, { "epoch": 0.7293837451622507, "grad_norm": 1.1273788213729858, "learning_rate": 1.854127503933994e-05, "loss": 0.0254, "step": 34300 }, { "epoch": 0.7304469867732744, "grad_norm": 0.9056223630905151, "learning_rate": 1.8539148556117894e-05, "loss": 0.0241, "step": 34350 }, { "epoch": 0.731510228384298, "grad_norm": 0.6109226942062378, "learning_rate": 1.8537022072895848e-05, "loss": 0.0244, "step": 34400 }, { "epoch": 0.731510228384298, "eval_f1": 0.6501795435076703, "eval_loss": 0.023100225254893303, "eval_precision": 0.8282133055172473, "eval_recall": 0.5351442729999538, "eval_runtime": 231.6155, "eval_samples_per_second": 406.061, "eval_steps_per_second": 12.693, "step": 34400 }, { "epoch": 0.7325734699953217, "grad_norm": 1.0231901407241821, "learning_rate": 1.8534895589673798e-05, "loss": 0.0219, "step": 34450 }, { "epoch": 0.7336367116063455, "grad_norm": 0.2983200252056122, "learning_rate": 1.853276910645175e-05, "loss": 0.0259, "step": 34500 }, { "epoch": 0.7346999532173691, "grad_norm": 0.7307019829750061, "learning_rate": 1.8530642623229705e-05, "loss": 0.0228, "step": 34550 }, { "epoch": 0.7357631948283928, "grad_norm": 1.2080340385437012, "learning_rate": 1.8528516140007656e-05, "loss": 0.0232, "step": 34600 }, { "epoch": 0.7357631948283928, "eval_f1": 0.6522354443928812, "eval_loss": 0.023165011778473854, "eval_precision": 0.8330556652868426, "eval_recall": 0.535912182132548, "eval_runtime": 231.2798, "eval_samples_per_second": 406.65, "eval_steps_per_second": 12.712, "step": 34600 }, { "epoch": 0.7368264364394165, "grad_norm": 0.28783348202705383, "learning_rate": 1.852638965678561e-05, "loss": 0.0235, "step": 34650 }, { "epoch": 0.7378896780504401, "grad_norm": 0.7509434819221497, "learning_rate": 1.852426317356356e-05, "loss": 0.0257, "step": 34700 }, { "epoch": 0.7389529196614639, "grad_norm": 0.40708792209625244, "learning_rate": 1.8522136690341513e-05, "loss": 0.024, "step": 34750 }, { "epoch": 0.7400161612724876, "grad_norm": 0.5156585574150085, "learning_rate": 1.8520010207119467e-05, "loss": 0.0239, "step": 34800 }, { "epoch": 0.7400161612724876, "eval_f1": 0.6488058667431724, "eval_loss": 0.023327326402068138, "eval_precision": 0.839612543211536, "eval_recall": 0.528664003845041, "eval_runtime": 231.2248, "eval_samples_per_second": 406.747, "eval_steps_per_second": 12.715, "step": 34800 }, { "epoch": 0.7410794028835113, "grad_norm": 0.25972506403923035, "learning_rate": 1.851788372389742e-05, "loss": 0.0241, "step": 34850 }, { "epoch": 0.7421426444945349, "grad_norm": 0.217527836561203, "learning_rate": 1.851575724067537e-05, "loss": 0.0221, "step": 34900 }, { "epoch": 0.7432058861055586, "grad_norm": 0.4464794397354126, "learning_rate": 1.8513630757453325e-05, "loss": 0.0241, "step": 34950 }, { "epoch": 0.7442691277165823, "grad_norm": 0.18067830801010132, "learning_rate": 1.851150427423128e-05, "loss": 0.0209, "step": 35000 }, { "epoch": 0.7442691277165823, "eval_f1": 0.6489137334096804, "eval_loss": 0.022680915892124176, "eval_precision": 0.8507231312388349, "eval_recall": 0.5244929866140442, "eval_runtime": 231.3035, "eval_samples_per_second": 406.609, "eval_steps_per_second": 12.711, "step": 35000 }, { "epoch": 0.745332369327606, "grad_norm": 0.2994401752948761, "learning_rate": 1.8509377791009232e-05, "loss": 0.0241, "step": 35050 }, { "epoch": 0.7463956109386297, "grad_norm": 0.317514032125473, "learning_rate": 1.8507251307787183e-05, "loss": 0.0247, "step": 35100 }, { "epoch": 0.7474588525496534, "grad_norm": 0.3717614710330963, "learning_rate": 1.8505124824565136e-05, "loss": 0.0242, "step": 35150 }, { "epoch": 0.748522094160677, "grad_norm": 0.20226731896400452, "learning_rate": 1.8502998341343087e-05, "loss": 0.0282, "step": 35200 }, { "epoch": 0.748522094160677, "eval_f1": 0.6368222513183108, "eval_loss": 0.02279890514910221, "eval_precision": 0.8682305131310761, "eval_recall": 0.5028092215389947, "eval_runtime": 231.3534, "eval_samples_per_second": 406.521, "eval_steps_per_second": 12.708, "step": 35200 }, { "epoch": 0.7495853357717007, "grad_norm": 0.4606381058692932, "learning_rate": 1.850087185812104e-05, "loss": 0.0259, "step": 35250 }, { "epoch": 0.7506485773827245, "grad_norm": 0.6153512597084045, "learning_rate": 1.8498745374898994e-05, "loss": 0.0257, "step": 35300 }, { "epoch": 0.7517118189937482, "grad_norm": 0.21296440064907074, "learning_rate": 1.8496618891676945e-05, "loss": 0.0237, "step": 35350 }, { "epoch": 0.7527750606047718, "grad_norm": 0.26174643635749817, "learning_rate": 1.84944924084549e-05, "loss": 0.0245, "step": 35400 }, { "epoch": 0.7527750606047718, "eval_f1": 0.6490645972757959, "eval_loss": 0.02372586913406849, "eval_precision": 0.8341082581539874, "eval_recall": 0.5312163349188429, "eval_runtime": 231.3117, "eval_samples_per_second": 406.594, "eval_steps_per_second": 12.71, "step": 35400 }, { "epoch": 0.7538383022157955, "grad_norm": 0.8594101071357727, "learning_rate": 1.8492365925232852e-05, "loss": 0.023, "step": 35450 }, { "epoch": 0.7549015438268192, "grad_norm": 0.8364015817642212, "learning_rate": 1.8490239442010806e-05, "loss": 0.0233, "step": 35500 }, { "epoch": 0.7559647854378428, "grad_norm": 0.23417985439300537, "learning_rate": 1.8488112958788756e-05, "loss": 0.0237, "step": 35550 }, { "epoch": 0.7570280270488666, "grad_norm": 0.34493276476860046, "learning_rate": 1.848598647556671e-05, "loss": 0.0218, "step": 35600 }, { "epoch": 0.7570280270488666, "eval_f1": 0.6472780217866769, "eval_loss": 0.023567402735352516, "eval_precision": 0.8136904712111279, "eval_recall": 0.5373761815579972, "eval_runtime": 231.2469, "eval_samples_per_second": 406.708, "eval_steps_per_second": 12.714, "step": 35600 }, { "epoch": 0.7580912686598903, "grad_norm": 0.7024061679840088, "learning_rate": 1.848385999234466e-05, "loss": 0.0217, "step": 35650 }, { "epoch": 0.759154510270914, "grad_norm": 0.20508909225463867, "learning_rate": 1.8481733509122614e-05, "loss": 0.0231, "step": 35700 }, { "epoch": 0.7602177518819376, "grad_norm": 0.22717680037021637, "learning_rate": 1.8479607025900568e-05, "loss": 0.0176, "step": 35750 }, { "epoch": 0.7612809934929613, "grad_norm": 0.5041443109512329, "learning_rate": 1.847748054267852e-05, "loss": 0.0278, "step": 35800 }, { "epoch": 0.7612809934929613, "eval_f1": 0.6490031826205324, "eval_loss": 0.022770071402192116, "eval_precision": 0.8488435410584568, "eval_recall": 0.5253271900602435, "eval_runtime": 231.238, "eval_samples_per_second": 406.724, "eval_steps_per_second": 12.714, "step": 35800 }, { "epoch": 0.7623442351039851, "grad_norm": 0.42551204562187195, "learning_rate": 1.847535405945647e-05, "loss": 0.0218, "step": 35850 }, { "epoch": 0.7634074767150087, "grad_norm": 0.30759137868881226, "learning_rate": 1.8473227576234425e-05, "loss": 0.0209, "step": 35900 }, { "epoch": 0.7644707183260324, "grad_norm": 0.6456029415130615, "learning_rate": 1.847110109301238e-05, "loss": 0.0271, "step": 35950 }, { "epoch": 0.7655339599370561, "grad_norm": 0.6302129030227661, "learning_rate": 1.846897460979033e-05, "loss": 0.0278, "step": 36000 }, { "epoch": 0.7655339599370561, "eval_f1": 0.6403004385005661, "eval_loss": 0.02303897775709629, "eval_precision": 0.8599144956827902, "eval_recall": 0.5100408262481003, "eval_runtime": 231.3877, "eval_samples_per_second": 406.461, "eval_steps_per_second": 12.706, "step": 36000 }, { "epoch": 0.7665972015480798, "grad_norm": 0.3156275153160095, "learning_rate": 1.8466848126568283e-05, "loss": 0.0269, "step": 36050 }, { "epoch": 0.7676604431591034, "grad_norm": 0.6801632642745972, "learning_rate": 1.8464721643346233e-05, "loss": 0.0262, "step": 36100 }, { "epoch": 0.7687236847701272, "grad_norm": 0.2840379476547241, "learning_rate": 1.8462595160124187e-05, "loss": 0.0227, "step": 36150 }, { "epoch": 0.7697869263811509, "grad_norm": 0.9347845911979675, "learning_rate": 1.846046867690214e-05, "loss": 0.0184, "step": 36200 }, { "epoch": 0.7697869263811509, "eval_f1": 0.6433724212936206, "eval_loss": 0.023987069725990295, "eval_precision": 0.840155285863035, "eval_recall": 0.5212777124041897, "eval_runtime": 231.3521, "eval_samples_per_second": 406.523, "eval_steps_per_second": 12.708, "step": 36200 }, { "epoch": 0.7708501679921745, "grad_norm": 0.18949593603610992, "learning_rate": 1.8458342193680095e-05, "loss": 0.025, "step": 36250 }, { "epoch": 0.7719134096031982, "grad_norm": 0.7767147421836853, "learning_rate": 1.8456215710458045e-05, "loss": 0.0224, "step": 36300 }, { "epoch": 0.7729766512142219, "grad_norm": 0.2699045240879059, "learning_rate": 1.8454089227236e-05, "loss": 0.0267, "step": 36350 }, { "epoch": 0.7740398928252457, "grad_norm": 0.42633891105651855, "learning_rate": 1.8451962744013952e-05, "loss": 0.0234, "step": 36400 }, { "epoch": 0.7740398928252457, "eval_f1": 0.6548170812627737, "eval_loss": 0.023464689031243324, "eval_precision": 0.7994329133280661, "eval_recall": 0.5545077370988197, "eval_runtime": 231.6702, "eval_samples_per_second": 405.965, "eval_steps_per_second": 12.69, "step": 36400 }, { "epoch": 0.7751031344362693, "grad_norm": 0.7378496527671814, "learning_rate": 1.8449836260791906e-05, "loss": 0.026, "step": 36450 }, { "epoch": 0.776166376047293, "grad_norm": 0.49219954013824463, "learning_rate": 1.8447709777569856e-05, "loss": 0.0284, "step": 36500 }, { "epoch": 0.7772296176583167, "grad_norm": 0.16968746483325958, "learning_rate": 1.844558329434781e-05, "loss": 0.0263, "step": 36550 }, { "epoch": 0.7782928592693403, "grad_norm": 0.6358396410942078, "learning_rate": 1.844345681112576e-05, "loss": 0.0222, "step": 36600 }, { "epoch": 0.7782928592693403, "eval_f1": 0.6381301817303837, "eval_loss": 0.023497330024838448, "eval_precision": 0.8600331581756578, "eval_recall": 0.5072509405505462, "eval_runtime": 231.8173, "eval_samples_per_second": 405.707, "eval_steps_per_second": 12.682, "step": 36600 }, { "epoch": 0.779356100880364, "grad_norm": 0.1740126758813858, "learning_rate": 1.8441330327903714e-05, "loss": 0.0281, "step": 36650 }, { "epoch": 0.7804193424913878, "grad_norm": 0.5439678430557251, "learning_rate": 1.8439203844681668e-05, "loss": 0.0234, "step": 36700 }, { "epoch": 0.7814825841024114, "grad_norm": 0.8563793897628784, "learning_rate": 1.8437077361459618e-05, "loss": 0.0263, "step": 36750 }, { "epoch": 0.7825458257134351, "grad_norm": 1.8942272663116455, "learning_rate": 1.8434950878237572e-05, "loss": 0.0269, "step": 36800 }, { "epoch": 0.7825458257134351, "eval_f1": 0.6456023261664445, "eval_loss": 0.022469626739621162, "eval_precision": 0.8521498070336784, "eval_recall": 0.5196479771947273, "eval_runtime": 231.4959, "eval_samples_per_second": 406.271, "eval_steps_per_second": 12.7, "step": 36800 }, { "epoch": 0.7836090673244588, "grad_norm": 0.9551807045936584, "learning_rate": 1.8432824395015526e-05, "loss": 0.024, "step": 36850 }, { "epoch": 0.7846723089354825, "grad_norm": 0.22184331715106964, "learning_rate": 1.843069791179348e-05, "loss": 0.0206, "step": 36900 }, { "epoch": 0.7857355505465062, "grad_norm": 0.2649756371974945, "learning_rate": 1.842857142857143e-05, "loss": 0.0257, "step": 36950 }, { "epoch": 0.7867987921575299, "grad_norm": 0.770045280456543, "learning_rate": 1.8426444945349383e-05, "loss": 0.0239, "step": 37000 }, { "epoch": 0.7867987921575299, "eval_f1": 0.6467173066948936, "eval_loss": 0.02295295149087906, "eval_precision": 0.8230127023150291, "eval_recall": 0.5326250890829545, "eval_runtime": 231.5406, "eval_samples_per_second": 406.192, "eval_steps_per_second": 12.698, "step": 37000 }, { "epoch": 0.7878620337685536, "grad_norm": 0.22904783487319946, "learning_rate": 1.8424318462127334e-05, "loss": 0.0287, "step": 37050 }, { "epoch": 0.7889252753795772, "grad_norm": 0.32247254252433777, "learning_rate": 1.8422191978905288e-05, "loss": 0.0264, "step": 37100 }, { "epoch": 0.7899885169906009, "grad_norm": 0.3963124454021454, "learning_rate": 1.842006549568324e-05, "loss": 0.0226, "step": 37150 }, { "epoch": 0.7910517586016247, "grad_norm": 0.4360211193561554, "learning_rate": 1.841793901246119e-05, "loss": 0.0246, "step": 37200 }, { "epoch": 0.7910517586016247, "eval_f1": 0.6543717972090214, "eval_loss": 0.023203890770673752, "eval_precision": 0.8176924159978121, "eval_recall": 0.5454309406610346, "eval_runtime": 231.3256, "eval_samples_per_second": 406.57, "eval_steps_per_second": 12.709, "step": 37200 }, { "epoch": 0.7921150002126484, "grad_norm": 0.25340187549591064, "learning_rate": 1.8415812529239145e-05, "loss": 0.0227, "step": 37250 }, { "epoch": 0.793178241823672, "grad_norm": 0.3310944139957428, "learning_rate": 1.84136860460171e-05, "loss": 0.0273, "step": 37300 }, { "epoch": 0.7942414834346957, "grad_norm": 1.9428234100341797, "learning_rate": 1.8411559562795053e-05, "loss": 0.0244, "step": 37350 }, { "epoch": 0.7953047250457194, "grad_norm": 0.4868288040161133, "learning_rate": 1.8409433079573003e-05, "loss": 0.0216, "step": 37400 }, { "epoch": 0.7953047250457194, "eval_f1": 0.6452531051248906, "eval_loss": 0.022808995097875595, "eval_precision": 0.8344580974249475, "eval_recall": 0.525990133196296, "eval_runtime": 231.2283, "eval_samples_per_second": 406.741, "eval_steps_per_second": 12.715, "step": 37400 }, { "epoch": 0.796367966656743, "grad_norm": 2.4055166244506836, "learning_rate": 1.8407306596350957e-05, "loss": 0.0287, "step": 37450 }, { "epoch": 0.7974312082677668, "grad_norm": 0.2339506447315216, "learning_rate": 1.8405180113128907e-05, "loss": 0.026, "step": 37500 }, { "epoch": 0.7984944498787905, "grad_norm": 0.36887574195861816, "learning_rate": 1.840305362990686e-05, "loss": 0.0299, "step": 37550 }, { "epoch": 0.7995576914898141, "grad_norm": 0.222794309258461, "learning_rate": 1.8400927146684815e-05, "loss": 0.0241, "step": 37600 }, { "epoch": 0.7995576914898141, "eval_f1": 0.6499961435456011, "eval_loss": 0.022446198388934135, "eval_precision": 0.825408302143516, "eval_recall": 0.5360723933904273, "eval_runtime": 231.2886, "eval_samples_per_second": 406.635, "eval_steps_per_second": 12.711, "step": 37600 }, { "epoch": 0.8006209331008378, "grad_norm": 1.2050421237945557, "learning_rate": 1.8398800663462765e-05, "loss": 0.0241, "step": 37650 }, { "epoch": 0.8016841747118615, "grad_norm": 1.603754997253418, "learning_rate": 1.839667418024072e-05, "loss": 0.0268, "step": 37700 }, { "epoch": 0.8027474163228853, "grad_norm": 0.3088008165359497, "learning_rate": 1.8394547697018672e-05, "loss": 0.0243, "step": 37750 }, { "epoch": 0.8038106579339089, "grad_norm": 0.28268033266067505, "learning_rate": 1.8392421213796626e-05, "loss": 0.0237, "step": 37800 }, { "epoch": 0.8038106579339089, "eval_f1": 0.6426835580425015, "eval_loss": 0.023136937990784645, "eval_precision": 0.8388824628524923, "eval_recall": 0.5208633729441569, "eval_runtime": 231.3006, "eval_samples_per_second": 406.614, "eval_steps_per_second": 12.711, "step": 37800 }, { "epoch": 0.8048738995449326, "grad_norm": 0.3559422194957733, "learning_rate": 1.839029473057458e-05, "loss": 0.0242, "step": 37850 }, { "epoch": 0.8059371411559563, "grad_norm": 0.815377950668335, "learning_rate": 1.838816824735253e-05, "loss": 0.0293, "step": 37900 }, { "epoch": 0.8070003827669799, "grad_norm": 0.2311554104089737, "learning_rate": 1.8386041764130484e-05, "loss": 0.0196, "step": 37950 }, { "epoch": 0.8080636243780036, "grad_norm": 0.48889002203941345, "learning_rate": 1.8383915280908434e-05, "loss": 0.0245, "step": 38000 }, { "epoch": 0.8080636243780036, "eval_f1": 0.6509144431308047, "eval_loss": 0.022472331300377846, "eval_precision": 0.844074898981377, "eval_recall": 0.529697090232056, "eval_runtime": 231.2145, "eval_samples_per_second": 406.765, "eval_steps_per_second": 12.715, "step": 38000 }, { "epoch": 0.8091268659890274, "grad_norm": 0.24490977823734283, "learning_rate": 1.8381788797686388e-05, "loss": 0.0281, "step": 38050 }, { "epoch": 0.8101901076000511, "grad_norm": 1.3587714433670044, "learning_rate": 1.8379662314464338e-05, "loss": 0.0201, "step": 38100 }, { "epoch": 0.8112533492110747, "grad_norm": 0.6095528602600098, "learning_rate": 1.8377535831242292e-05, "loss": 0.0223, "step": 38150 }, { "epoch": 0.8123165908220984, "grad_norm": 2.431068181991577, "learning_rate": 1.8375409348020246e-05, "loss": 0.0245, "step": 38200 }, { "epoch": 0.8123165908220984, "eval_f1": 0.6561463409120419, "eval_loss": 0.024169327691197395, "eval_precision": 0.828522791434497, "eval_recall": 0.5431437868416537, "eval_runtime": 231.3127, "eval_samples_per_second": 406.592, "eval_steps_per_second": 12.71, "step": 38200 }, { "epoch": 0.8133798324331221, "grad_norm": 0.6159511804580688, "learning_rate": 1.83732828647982e-05, "loss": 0.0215, "step": 38250 }, { "epoch": 0.8144430740441458, "grad_norm": 0.23389458656311035, "learning_rate": 1.8371156381576153e-05, "loss": 0.0231, "step": 38300 }, { "epoch": 0.8155063156551695, "grad_norm": 0.2651449739933014, "learning_rate": 1.8369029898354103e-05, "loss": 0.0204, "step": 38350 }, { "epoch": 0.8165695572661932, "grad_norm": 0.23006793856620789, "learning_rate": 1.8366903415132057e-05, "loss": 0.0265, "step": 38400 }, { "epoch": 0.8165695572661932, "eval_f1": 0.6484995229101627, "eval_loss": 0.02327045612037182, "eval_precision": 0.8575101758975816, "eval_recall": 0.5214103010314002, "eval_runtime": 231.3029, "eval_samples_per_second": 406.61, "eval_steps_per_second": 12.711, "step": 38400 }, { "epoch": 0.8176327988772168, "grad_norm": 0.4539220631122589, "learning_rate": 1.8364776931910007e-05, "loss": 0.0247, "step": 38450 }, { "epoch": 0.8186960404882405, "grad_norm": 0.5614054799079895, "learning_rate": 1.836265044868796e-05, "loss": 0.0229, "step": 38500 }, { "epoch": 0.8197592820992642, "grad_norm": 0.4873792231082916, "learning_rate": 1.836052396546591e-05, "loss": 0.0193, "step": 38550 }, { "epoch": 0.820822523710288, "grad_norm": 2.0669424533843994, "learning_rate": 1.8358397482243865e-05, "loss": 0.0238, "step": 38600 }, { "epoch": 0.820822523710288, "eval_f1": 0.6599651586800758, "eval_loss": 0.02416382171213627, "eval_precision": 0.807864629170269, "eval_recall": 0.5578390263574833, "eval_runtime": 231.2557, "eval_samples_per_second": 406.693, "eval_steps_per_second": 12.713, "step": 38600 }, { "epoch": 0.8218857653213116, "grad_norm": 0.3125521242618561, "learning_rate": 1.835627099902182e-05, "loss": 0.0312, "step": 38650 }, { "epoch": 0.8229490069323353, "grad_norm": 0.7108232378959656, "learning_rate": 1.8354144515799773e-05, "loss": 0.0221, "step": 38700 }, { "epoch": 0.824012248543359, "grad_norm": 0.8708053827285767, "learning_rate": 1.8352018032577726e-05, "loss": 0.0265, "step": 38750 }, { "epoch": 0.8250754901543826, "grad_norm": 0.9917564988136292, "learning_rate": 1.8349891549355677e-05, "loss": 0.0232, "step": 38800 }, { "epoch": 0.8250754901543826, "eval_f1": 0.6534395712004725, "eval_loss": 0.023175733163952827, "eval_precision": 0.8487694643549973, "eval_recall": 0.5311942368143079, "eval_runtime": 231.3113, "eval_samples_per_second": 406.595, "eval_steps_per_second": 12.71, "step": 38800 }, { "epoch": 0.8261387317654064, "grad_norm": 0.6145575046539307, "learning_rate": 1.834776506613363e-05, "loss": 0.0257, "step": 38850 }, { "epoch": 0.8272019733764301, "grad_norm": 0.2410363107919693, "learning_rate": 1.834563858291158e-05, "loss": 0.0233, "step": 38900 }, { "epoch": 0.8282652149874538, "grad_norm": 0.38584986329078674, "learning_rate": 1.8343512099689535e-05, "loss": 0.0245, "step": 38950 }, { "epoch": 0.8293284565984774, "grad_norm": 0.2867968678474426, "learning_rate": 1.8341385616467488e-05, "loss": 0.0256, "step": 39000 }, { "epoch": 0.8293284565984774, "eval_f1": 0.6579357873802515, "eval_loss": 0.02392045222222805, "eval_precision": 0.8138175304318842, "eval_recall": 0.5521708625442348, "eval_runtime": 231.2492, "eval_samples_per_second": 406.704, "eval_steps_per_second": 12.714, "step": 39000 }, { "epoch": 0.8303916982095011, "grad_norm": 0.2975466847419739, "learning_rate": 1.833925913324544e-05, "loss": 0.0203, "step": 39050 }, { "epoch": 0.8314549398205248, "grad_norm": 2.017557382583618, "learning_rate": 1.8337132650023392e-05, "loss": 0.0227, "step": 39100 }, { "epoch": 0.8325181814315485, "grad_norm": 0.20542718470096588, "learning_rate": 1.8335006166801346e-05, "loss": 0.0221, "step": 39150 }, { "epoch": 0.8335814230425722, "grad_norm": 0.41681042313575745, "learning_rate": 1.83328796835793e-05, "loss": 0.0221, "step": 39200 }, { "epoch": 0.8335814230425722, "eval_f1": 0.6548787760472368, "eval_loss": 0.022886035963892937, "eval_precision": 0.8479555274527657, "eval_recall": 0.5334206208462174, "eval_runtime": 231.2703, "eval_samples_per_second": 406.667, "eval_steps_per_second": 12.712, "step": 39200 }, { "epoch": 0.8346446646535959, "grad_norm": 0.6221297383308411, "learning_rate": 1.833075320035725e-05, "loss": 0.0241, "step": 39250 }, { "epoch": 0.8357079062646195, "grad_norm": 0.33693426847457886, "learning_rate": 1.8328626717135204e-05, "loss": 0.0211, "step": 39300 }, { "epoch": 0.8367711478756432, "grad_norm": 0.37722811102867126, "learning_rate": 1.8326500233913158e-05, "loss": 0.0275, "step": 39350 }, { "epoch": 0.837834389486667, "grad_norm": 0.33600524067878723, "learning_rate": 1.8324373750691108e-05, "loss": 0.0258, "step": 39400 }, { "epoch": 0.837834389486667, "eval_f1": 0.6562279286490273, "eval_loss": 0.022317364811897278, "eval_precision": 0.8262999404446659, "eval_recall": 0.5442155449116051, "eval_runtime": 231.3657, "eval_samples_per_second": 406.499, "eval_steps_per_second": 12.707, "step": 39400 }, { "epoch": 0.8388976310976907, "grad_norm": 1.0509536266326904, "learning_rate": 1.832224726746906e-05, "loss": 0.0227, "step": 39450 }, { "epoch": 0.8399608727087143, "grad_norm": 0.3650446832180023, "learning_rate": 1.8320120784247012e-05, "loss": 0.0228, "step": 39500 }, { "epoch": 0.841024114319738, "grad_norm": 0.28059908747673035, "learning_rate": 1.8317994301024966e-05, "loss": 0.0245, "step": 39550 }, { "epoch": 0.8420873559307617, "grad_norm": 0.9687247276306152, "learning_rate": 1.831586781780292e-05, "loss": 0.0228, "step": 39600 }, { "epoch": 0.8420873559307617, "eval_f1": 0.646334930100777, "eval_loss": 0.02334713563323021, "eval_precision": 0.8357268656846465, "eval_recall": 0.5269237781129033, "eval_runtime": 231.263, "eval_samples_per_second": 406.68, "eval_steps_per_second": 12.713, "step": 39600 }, { "epoch": 0.8431505975417853, "grad_norm": 0.3260483741760254, "learning_rate": 1.8313741334580873e-05, "loss": 0.0226, "step": 39650 }, { "epoch": 0.8442138391528091, "grad_norm": 0.3404238820075989, "learning_rate": 1.8311614851358823e-05, "loss": 0.0239, "step": 39700 }, { "epoch": 0.8452770807638328, "grad_norm": 0.8154049515724182, "learning_rate": 1.8309488368136777e-05, "loss": 0.0247, "step": 39750 }, { "epoch": 0.8463403223748565, "grad_norm": 0.2282392531633377, "learning_rate": 1.830736188491473e-05, "loss": 0.0194, "step": 39800 }, { "epoch": 0.8463403223748565, "eval_f1": 0.6610501286782418, "eval_loss": 0.02362154610455036, "eval_precision": 0.8194052901232385, "eval_recall": 0.5539884316422452, "eval_runtime": 231.2557, "eval_samples_per_second": 406.693, "eval_steps_per_second": 12.713, "step": 39800 }, { "epoch": 0.8474035639858801, "grad_norm": 1.364546775817871, "learning_rate": 1.830523540169268e-05, "loss": 0.02, "step": 39850 }, { "epoch": 0.8484668055969038, "grad_norm": 0.5887126922607422, "learning_rate": 1.8303108918470635e-05, "loss": 0.0223, "step": 39900 }, { "epoch": 0.8495300472079276, "grad_norm": 0.5980216860771179, "learning_rate": 1.8300982435248585e-05, "loss": 0.0228, "step": 39950 }, { "epoch": 0.8505932888189512, "grad_norm": 0.3049355149269104, "learning_rate": 1.829885595202654e-05, "loss": 0.0267, "step": 40000 }, { "epoch": 0.8505932888189512, "eval_f1": 0.6523896269798831, "eval_loss": 0.022994186729192734, "eval_precision": 0.8219617457891131, "eval_recall": 0.5408179613393362, "eval_runtime": 231.2332, "eval_samples_per_second": 406.732, "eval_steps_per_second": 12.714, "step": 40000 }, { "epoch": 0.8516565304299749, "grad_norm": 0.794547975063324, "learning_rate": 1.8296729468804493e-05, "loss": 0.024, "step": 40050 }, { "epoch": 0.8527197720409986, "grad_norm": 0.645914614200592, "learning_rate": 1.8294602985582446e-05, "loss": 0.0244, "step": 40100 }, { "epoch": 0.8537830136520222, "grad_norm": 0.11788304150104523, "learning_rate": 1.82924765023604e-05, "loss": 0.022, "step": 40150 }, { "epoch": 0.854846255263046, "grad_norm": 0.3445088565349579, "learning_rate": 1.829035001913835e-05, "loss": 0.0246, "step": 40200 }, { "epoch": 0.854846255263046, "eval_f1": 0.6514743399742456, "eval_loss": 0.022903816774487495, "eval_precision": 0.8106301090477795, "eval_recall": 0.5445580655318989, "eval_runtime": 231.2831, "eval_samples_per_second": 406.644, "eval_steps_per_second": 12.712, "step": 40200 }, { "epoch": 0.8559094968740697, "grad_norm": 0.24715080857276917, "learning_rate": 1.8288223535916304e-05, "loss": 0.0237, "step": 40250 }, { "epoch": 0.8569727384850934, "grad_norm": 0.506866991519928, "learning_rate": 1.8286097052694254e-05, "loss": 0.0216, "step": 40300 }, { "epoch": 0.858035980096117, "grad_norm": 0.2001570463180542, "learning_rate": 1.8283970569472208e-05, "loss": 0.0256, "step": 40350 }, { "epoch": 0.8590992217071407, "grad_norm": 0.35063880681991577, "learning_rate": 1.828184408625016e-05, "loss": 0.0207, "step": 40400 }, { "epoch": 0.8590992217071407, "eval_f1": 0.6620040934345629, "eval_loss": 0.02380812168121338, "eval_precision": 0.8104295362160476, "eval_recall": 0.5595295313544171, "eval_runtime": 237.255, "eval_samples_per_second": 396.409, "eval_steps_per_second": 12.392, "step": 40400 }, { "epoch": 0.8601624633181644, "grad_norm": 1.0387039184570312, "learning_rate": 1.8279717603028112e-05, "loss": 0.0227, "step": 40450 }, { "epoch": 0.8612257049291882, "grad_norm": 1.0084713697433472, "learning_rate": 1.8277591119806066e-05, "loss": 0.0266, "step": 40500 }, { "epoch": 0.8622889465402118, "grad_norm": 0.4679969251155853, "learning_rate": 1.827546463658402e-05, "loss": 0.0227, "step": 40550 }, { "epoch": 0.8633521881512355, "grad_norm": 0.7763227224349976, "learning_rate": 1.8273338153361973e-05, "loss": 0.0243, "step": 40600 }, { "epoch": 0.8633521881512355, "eval_f1": 0.6564364373884587, "eval_loss": 0.022662492468953133, "eval_precision": 0.8274083098768368, "eval_recall": 0.5440221864969231, "eval_runtime": 236.7212, "eval_samples_per_second": 397.303, "eval_steps_per_second": 12.42, "step": 40600 }, { "epoch": 0.8644154297622592, "grad_norm": 0.34784257411956787, "learning_rate": 1.8271211670139924e-05, "loss": 0.0259, "step": 40650 }, { "epoch": 0.8654786713732828, "grad_norm": 0.8269448280334473, "learning_rate": 1.8269085186917877e-05, "loss": 0.0214, "step": 40700 }, { "epoch": 0.8665419129843066, "grad_norm": 0.2333746999502182, "learning_rate": 1.826695870369583e-05, "loss": 0.022, "step": 40750 }, { "epoch": 0.8676051545953303, "grad_norm": 1.5520468950271606, "learning_rate": 1.826483222047378e-05, "loss": 0.0233, "step": 40800 }, { "epoch": 0.8676051545953303, "eval_f1": 0.6581824568166047, "eval_loss": 0.023179808631539345, "eval_precision": 0.8316740630746983, "eval_recall": 0.5445801636364339, "eval_runtime": 236.8792, "eval_samples_per_second": 397.038, "eval_steps_per_second": 12.411, "step": 40800 }, { "epoch": 0.8686683962063539, "grad_norm": 0.32171201705932617, "learning_rate": 1.8262705737251735e-05, "loss": 0.0216, "step": 40850 }, { "epoch": 0.8697316378173776, "grad_norm": 1.1372419595718384, "learning_rate": 1.8260579254029686e-05, "loss": 0.0246, "step": 40900 }, { "epoch": 0.8707948794284013, "grad_norm": 0.29713359475135803, "learning_rate": 1.825845277080764e-05, "loss": 0.0247, "step": 40950 }, { "epoch": 0.871858121039425, "grad_norm": 0.6862639784812927, "learning_rate": 1.8256326287585593e-05, "loss": 0.0285, "step": 41000 }, { "epoch": 0.871858121039425, "eval_f1": 0.6597372063765305, "eval_loss": 0.02347043715417385, "eval_precision": 0.8249751243780411, "eval_recall": 0.5496461541011016, "eval_runtime": 236.9414, "eval_samples_per_second": 396.934, "eval_steps_per_second": 12.408, "step": 41000 }, { "epoch": 0.8729213626504487, "grad_norm": 0.3266887068748474, "learning_rate": 1.8254199804363547e-05, "loss": 0.0235, "step": 41050 }, { "epoch": 0.8739846042614724, "grad_norm": 0.4940395653247833, "learning_rate": 1.8252073321141497e-05, "loss": 0.0225, "step": 41100 }, { "epoch": 0.8750478458724961, "grad_norm": 0.39644426107406616, "learning_rate": 1.824994683791945e-05, "loss": 0.0274, "step": 41150 }, { "epoch": 0.8761110874835197, "grad_norm": 0.3752153217792511, "learning_rate": 1.8247820354697405e-05, "loss": 0.0249, "step": 41200 }, { "epoch": 0.8761110874835197, "eval_f1": 0.6557298195900589, "eval_loss": 0.022104868665337563, "eval_precision": 0.8467153922819284, "eval_recall": 0.535044831529546, "eval_runtime": 236.7416, "eval_samples_per_second": 397.269, "eval_steps_per_second": 12.419, "step": 41200 }, { "epoch": 0.8771743290945434, "grad_norm": 0.7290238738059998, "learning_rate": 1.8245693871475355e-05, "loss": 0.0227, "step": 41250 }, { "epoch": 0.8782375707055672, "grad_norm": 0.4507587254047394, "learning_rate": 1.824356738825331e-05, "loss": 0.0211, "step": 41300 }, { "epoch": 0.8793008123165909, "grad_norm": 0.7580509781837463, "learning_rate": 1.824144090503126e-05, "loss": 0.0222, "step": 41350 }, { "epoch": 0.8803640539276145, "grad_norm": 0.7605857253074646, "learning_rate": 1.8239314421809213e-05, "loss": 0.0206, "step": 41400 }, { "epoch": 0.8803640539276145, "eval_f1": 0.6573166416046268, "eval_loss": 0.023299356922507286, "eval_precision": 0.8284799704260629, "eval_recall": 0.5447679975249822, "eval_runtime": 236.7757, "eval_samples_per_second": 397.211, "eval_steps_per_second": 12.417, "step": 41400 }, { "epoch": 0.8814272955386382, "grad_norm": 0.331926167011261, "learning_rate": 1.8237187938587166e-05, "loss": 0.0222, "step": 41450 }, { "epoch": 0.8824905371496619, "grad_norm": 0.25403085350990295, "learning_rate": 1.823506145536512e-05, "loss": 0.0271, "step": 41500 }, { "epoch": 0.8835537787606855, "grad_norm": 1.7504284381866455, "learning_rate": 1.823293497214307e-05, "loss": 0.028, "step": 41550 }, { "epoch": 0.8846170203717093, "grad_norm": 1.002791404724121, "learning_rate": 1.8230808488921024e-05, "loss": 0.0269, "step": 41600 }, { "epoch": 0.8846170203717093, "eval_f1": 0.6457501339875444, "eval_loss": 0.022923072800040245, "eval_precision": 0.8579681749097382, "eval_recall": 0.5176978194695063, "eval_runtime": 236.9178, "eval_samples_per_second": 396.973, "eval_steps_per_second": 12.409, "step": 41600 }, { "epoch": 0.885680261982733, "grad_norm": 1.6206576824188232, "learning_rate": 1.8228682005698978e-05, "loss": 0.0191, "step": 41650 }, { "epoch": 0.8867435035937566, "grad_norm": 0.3704272210597992, "learning_rate": 1.8226555522476928e-05, "loss": 0.0241, "step": 41700 }, { "epoch": 0.8878067452047803, "grad_norm": 1.1722341775894165, "learning_rate": 1.8224429039254882e-05, "loss": 0.0237, "step": 41750 }, { "epoch": 0.888869986815804, "grad_norm": 0.5300278067588806, "learning_rate": 1.8222302556032832e-05, "loss": 0.0218, "step": 41800 }, { "epoch": 0.888869986815804, "eval_f1": 0.6462531600064145, "eval_loss": 0.02278389409184456, "eval_precision": 0.8450603678287352, "eval_recall": 0.523172624868073, "eval_runtime": 236.9365, "eval_samples_per_second": 396.942, "eval_steps_per_second": 12.408, "step": 41800 }, { "epoch": 0.8899332284268278, "grad_norm": 0.43678972125053406, "learning_rate": 1.8220176072810786e-05, "loss": 0.0192, "step": 41850 }, { "epoch": 0.8909964700378514, "grad_norm": 0.198232501745224, "learning_rate": 1.821804958958874e-05, "loss": 0.0228, "step": 41900 }, { "epoch": 0.8920597116488751, "grad_norm": 0.45339858531951904, "learning_rate": 1.8215923106366693e-05, "loss": 0.0227, "step": 41950 }, { "epoch": 0.8931229532598988, "grad_norm": 0.6429247856140137, "learning_rate": 1.8213796623144644e-05, "loss": 0.0193, "step": 42000 }, { "epoch": 0.8931229532598988, "eval_f1": 0.6542062967661291, "eval_loss": 0.02282370813190937, "eval_precision": 0.8409163882500247, "eval_recall": 0.5353431559407695, "eval_runtime": 237.0492, "eval_samples_per_second": 396.753, "eval_steps_per_second": 12.402, "step": 42000 }, { "epoch": 0.8941861948709224, "grad_norm": 0.447672963142395, "learning_rate": 1.8211670139922597e-05, "loss": 0.0202, "step": 42050 }, { "epoch": 0.8952494364819461, "grad_norm": 0.7601777911186218, "learning_rate": 1.820954365670055e-05, "loss": 0.026, "step": 42100 }, { "epoch": 0.8963126780929699, "grad_norm": 0.7301594614982605, "learning_rate": 1.8207417173478505e-05, "loss": 0.0214, "step": 42150 }, { "epoch": 0.8973759197039936, "grad_norm": 0.37412363290786743, "learning_rate": 1.8205290690256455e-05, "loss": 0.0226, "step": 42200 }, { "epoch": 0.8973759197039936, "eval_f1": 0.6577199204182457, "eval_loss": 0.022884555160999298, "eval_precision": 0.8089999112752314, "eval_recall": 0.5541044466910544, "eval_runtime": 237.3885, "eval_samples_per_second": 396.186, "eval_steps_per_second": 12.385, "step": 42200 }, { "epoch": 0.8984391613150172, "grad_norm": 1.3081921339035034, "learning_rate": 1.8203164207034406e-05, "loss": 0.0266, "step": 42250 }, { "epoch": 0.8995024029260409, "grad_norm": 0.6771134734153748, "learning_rate": 1.820103772381236e-05, "loss": 0.0263, "step": 42300 }, { "epoch": 0.9005656445370646, "grad_norm": 0.7873882055282593, "learning_rate": 1.8198911240590313e-05, "loss": 0.0243, "step": 42350 }, { "epoch": 0.9016288861480883, "grad_norm": 0.7412579655647278, "learning_rate": 1.8196784757368267e-05, "loss": 0.0249, "step": 42400 }, { "epoch": 0.9016288861480883, "eval_f1": 0.6484384079411407, "eval_loss": 0.021912800148129463, "eval_precision": 0.8575505048209401, "eval_recall": 0.5213163840871261, "eval_runtime": 236.5117, "eval_samples_per_second": 397.655, "eval_steps_per_second": 12.431, "step": 42400 }, { "epoch": 0.902692127759112, "grad_norm": 0.46650347113609314, "learning_rate": 1.8194658274146217e-05, "loss": 0.023, "step": 42450 }, { "epoch": 0.9037553693701357, "grad_norm": 0.5222067832946777, "learning_rate": 1.819253179092417e-05, "loss": 0.0286, "step": 42500 }, { "epoch": 0.9048186109811593, "grad_norm": 0.25706401467323303, "learning_rate": 1.8190405307702124e-05, "loss": 0.0201, "step": 42550 }, { "epoch": 0.905881852592183, "grad_norm": 0.303478479385376, "learning_rate": 1.8188278824480078e-05, "loss": 0.0242, "step": 42600 }, { "epoch": 0.905881852592183, "eval_f1": 0.6570165675865697, "eval_loss": 0.022312041372060776, "eval_precision": 0.8038369304555713, "eval_recall": 0.5555463480119686, "eval_runtime": 237.2726, "eval_samples_per_second": 396.379, "eval_steps_per_second": 12.391, "step": 42600 }, { "epoch": 0.9069450942032068, "grad_norm": 2.1913256645202637, "learning_rate": 1.818615234125803e-05, "loss": 0.0243, "step": 42650 }, { "epoch": 0.9080083358142305, "grad_norm": 0.40043526887893677, "learning_rate": 1.8184025858035982e-05, "loss": 0.0219, "step": 42700 }, { "epoch": 0.9090715774252541, "grad_norm": 2.970536947250366, "learning_rate": 1.8181899374813933e-05, "loss": 0.0232, "step": 42750 }, { "epoch": 0.9101348190362778, "grad_norm": 0.26423510909080505, "learning_rate": 1.8179772891591886e-05, "loss": 0.0212, "step": 42800 }, { "epoch": 0.9101348190362778, "eval_f1": 0.6611380045350335, "eval_loss": 0.023796144872903824, "eval_precision": 0.8161691905665533, "eval_recall": 0.5556015932733063, "eval_runtime": 237.1263, "eval_samples_per_second": 396.624, "eval_steps_per_second": 12.398, "step": 42800 }, { "epoch": 0.9111980606473015, "grad_norm": 0.6396982073783875, "learning_rate": 1.817764640836984e-05, "loss": 0.0241, "step": 42850 }, { "epoch": 0.9122613022583251, "grad_norm": 0.3591616451740265, "learning_rate": 1.8175519925147794e-05, "loss": 0.0256, "step": 42900 }, { "epoch": 0.9133245438693489, "grad_norm": 1.1731879711151123, "learning_rate": 1.8173393441925744e-05, "loss": 0.0316, "step": 42950 }, { "epoch": 0.9143877854803726, "grad_norm": 0.2453346848487854, "learning_rate": 1.8171266958703698e-05, "loss": 0.0286, "step": 43000 }, { "epoch": 0.9143877854803726, "eval_f1": 0.6490552401304656, "eval_loss": 0.02256168983876705, "eval_precision": 0.8185487940255645, "eval_recall": 0.5377131776521571, "eval_runtime": 236.8802, "eval_samples_per_second": 397.036, "eval_steps_per_second": 12.411, "step": 43000 }, { "epoch": 0.9154510270913963, "grad_norm": 0.45421102643013, "learning_rate": 1.816914047548165e-05, "loss": 0.0215, "step": 43050 }, { "epoch": 0.9165142687024199, "grad_norm": 0.6178030371665955, "learning_rate": 1.8167013992259602e-05, "loss": 0.0229, "step": 43100 }, { "epoch": 0.9175775103134436, "grad_norm": 2.494650363922119, "learning_rate": 1.8164887509037556e-05, "loss": 0.0256, "step": 43150 }, { "epoch": 0.9186407519244674, "grad_norm": 0.22184647619724274, "learning_rate": 1.8162761025815506e-05, "loss": 0.0178, "step": 43200 }, { "epoch": 0.9186407519244674, "eval_f1": 0.6598187459771917, "eval_loss": 0.0228362288326025, "eval_precision": 0.804801628641118, "eval_recall": 0.5590986183159831, "eval_runtime": 237.6936, "eval_samples_per_second": 395.677, "eval_steps_per_second": 12.369, "step": 43200 }, { "epoch": 0.919703993535491, "grad_norm": 0.39982524514198303, "learning_rate": 1.816063454259346e-05, "loss": 0.0193, "step": 43250 }, { "epoch": 0.9207672351465147, "grad_norm": 0.3705034554004669, "learning_rate": 1.8158508059371413e-05, "loss": 0.0281, "step": 43300 }, { "epoch": 0.9218304767575384, "grad_norm": 0.26773950457572937, "learning_rate": 1.8156381576149367e-05, "loss": 0.0222, "step": 43350 }, { "epoch": 0.922893718368562, "grad_norm": 1.89653480052948, "learning_rate": 1.8154255092927317e-05, "loss": 0.0289, "step": 43400 }, { "epoch": 0.922893718368562, "eval_f1": 0.6543795657776447, "eval_loss": 0.02191690169274807, "eval_precision": 0.8254227307141562, "eval_recall": 0.5420554551933008, "eval_runtime": 236.6049, "eval_samples_per_second": 397.498, "eval_steps_per_second": 12.426, "step": 43400 }, { "epoch": 0.9239569599795857, "grad_norm": 1.4067548513412476, "learning_rate": 1.815212860970527e-05, "loss": 0.0271, "step": 43450 }, { "epoch": 0.9250202015906095, "grad_norm": 0.4713330864906311, "learning_rate": 1.8150002126483225e-05, "loss": 0.0202, "step": 43500 }, { "epoch": 0.9260834432016332, "grad_norm": 0.4779347777366638, "learning_rate": 1.814787564326118e-05, "loss": 0.0265, "step": 43550 }, { "epoch": 0.9271466848126568, "grad_norm": 0.7022972702980042, "learning_rate": 1.814574916003913e-05, "loss": 0.0221, "step": 43600 }, { "epoch": 0.9271466848126568, "eval_f1": 0.661162365336841, "eval_loss": 0.022343920543789864, "eval_precision": 0.8209629174348846, "eval_recall": 0.5534359790288682, "eval_runtime": 235.5592, "eval_samples_per_second": 399.263, "eval_steps_per_second": 12.481, "step": 43600 }, { "epoch": 0.9282099264236805, "grad_norm": 0.5318084359169006, "learning_rate": 1.814362267681708e-05, "loss": 0.0229, "step": 43650 }, { "epoch": 0.9292731680347042, "grad_norm": 0.6239136457443237, "learning_rate": 1.8141496193595033e-05, "loss": 0.0241, "step": 43700 }, { "epoch": 0.930336409645728, "grad_norm": 0.33184027671813965, "learning_rate": 1.8139369710372987e-05, "loss": 0.0295, "step": 43750 }, { "epoch": 0.9313996512567516, "grad_norm": 0.5624176263809204, "learning_rate": 1.813724322715094e-05, "loss": 0.0263, "step": 43800 }, { "epoch": 0.9313996512567516, "eval_f1": 0.6510627814233759, "eval_loss": 0.02256704308092594, "eval_precision": 0.8306729532789686, "eval_recall": 0.5353155333101007, "eval_runtime": 235.5983, "eval_samples_per_second": 399.196, "eval_steps_per_second": 12.479, "step": 43800 }, { "epoch": 0.9324628928677753, "grad_norm": 4.972071647644043, "learning_rate": 1.813511674392889e-05, "loss": 0.0259, "step": 43850 }, { "epoch": 0.933526134478799, "grad_norm": 0.338680624961853, "learning_rate": 1.8132990260706844e-05, "loss": 0.0223, "step": 43900 }, { "epoch": 0.9345893760898226, "grad_norm": 0.7072756290435791, "learning_rate": 1.8130863777484798e-05, "loss": 0.0193, "step": 43950 }, { "epoch": 0.9356526177008463, "grad_norm": 0.97054523229599, "learning_rate": 1.8128737294262752e-05, "loss": 0.0266, "step": 44000 }, { "epoch": 0.9356526177008463, "eval_f1": 0.6588523591957348, "eval_loss": 0.02238084003329277, "eval_precision": 0.8225846469390788, "eval_recall": 0.5494804183170885, "eval_runtime": 235.598, "eval_samples_per_second": 399.197, "eval_steps_per_second": 12.479, "step": 44000 }, { "epoch": 0.9367158593118701, "grad_norm": 0.5495061278343201, "learning_rate": 1.8126610811040702e-05, "loss": 0.0218, "step": 44050 }, { "epoch": 0.9377791009228937, "grad_norm": 0.5108076333999634, "learning_rate": 1.8124484327818656e-05, "loss": 0.0232, "step": 44100 }, { "epoch": 0.9388423425339174, "grad_norm": 0.3104453980922699, "learning_rate": 1.8122357844596606e-05, "loss": 0.0181, "step": 44150 }, { "epoch": 0.9399055841449411, "grad_norm": 0.8615353107452393, "learning_rate": 1.812023136137456e-05, "loss": 0.0208, "step": 44200 }, { "epoch": 0.9399055841449411, "eval_f1": 0.6524520208488427, "eval_loss": 0.02254379726946354, "eval_precision": 0.8463984637202944, "eval_recall": 0.5308185690372115, "eval_runtime": 235.3597, "eval_samples_per_second": 399.601, "eval_steps_per_second": 12.492, "step": 44200 }, { "epoch": 0.9409688257559647, "grad_norm": 0.3588934540748596, "learning_rate": 1.8118104878152514e-05, "loss": 0.0227, "step": 44250 }, { "epoch": 0.9420320673669885, "grad_norm": 0.38570132851600647, "learning_rate": 1.8115978394930464e-05, "loss": 0.0242, "step": 44300 }, { "epoch": 0.9430953089780122, "grad_norm": 0.5387710928916931, "learning_rate": 1.8113851911708418e-05, "loss": 0.0227, "step": 44350 }, { "epoch": 0.9441585505890359, "grad_norm": 0.2592546343803406, "learning_rate": 1.811172542848637e-05, "loss": 0.0268, "step": 44400 }, { "epoch": 0.9441585505890359, "eval_f1": 0.6542119310759695, "eval_loss": 0.021951448172330856, "eval_precision": 0.8448912130963302, "eval_recall": 0.5337520924142436, "eval_runtime": 235.6013, "eval_samples_per_second": 399.191, "eval_steps_per_second": 12.479, "step": 44400 }, { "epoch": 0.9452217922000595, "grad_norm": 0.49792754650115967, "learning_rate": 1.8109598945264325e-05, "loss": 0.0246, "step": 44450 }, { "epoch": 0.9462850338110832, "grad_norm": 3.6265859603881836, "learning_rate": 1.8107472462042276e-05, "loss": 0.0242, "step": 44500 }, { "epoch": 0.9473482754221069, "grad_norm": 0.6745219826698303, "learning_rate": 1.810534597882023e-05, "loss": 0.0202, "step": 44550 }, { "epoch": 0.9484115170331306, "grad_norm": 0.861034095287323, "learning_rate": 1.810321949559818e-05, "loss": 0.0197, "step": 44600 }, { "epoch": 0.9484115170331306, "eval_f1": 0.6622999363105806, "eval_loss": 0.02339394949376583, "eval_precision": 0.8254133168418202, "eval_recall": 0.5530161150427017, "eval_runtime": 235.5056, "eval_samples_per_second": 399.354, "eval_steps_per_second": 12.484, "step": 44600 }, { "epoch": 0.9494747586441543, "grad_norm": 0.32888713479042053, "learning_rate": 1.8101093012376133e-05, "loss": 0.0232, "step": 44650 }, { "epoch": 0.950538000255178, "grad_norm": 0.32498815655708313, "learning_rate": 1.8098966529154087e-05, "loss": 0.0248, "step": 44700 }, { "epoch": 0.9516012418662017, "grad_norm": 0.3865691125392914, "learning_rate": 1.8096840045932037e-05, "loss": 0.0282, "step": 44750 }, { "epoch": 0.9526644834772253, "grad_norm": 0.9221877455711365, "learning_rate": 1.809471356270999e-05, "loss": 0.0247, "step": 44800 }, { "epoch": 0.9526644834772253, "eval_f1": 0.6571331762494425, "eval_loss": 0.022035665810108185, "eval_precision": 0.8234711624074607, "eval_recall": 0.5467015816718018, "eval_runtime": 235.4539, "eval_samples_per_second": 399.441, "eval_steps_per_second": 12.487, "step": 44800 }, { "epoch": 0.9537277250882491, "grad_norm": 1.4794731140136719, "learning_rate": 1.8092587079487945e-05, "loss": 0.0263, "step": 44850 }, { "epoch": 0.9547909666992728, "grad_norm": 0.7216793298721313, "learning_rate": 1.80904605962659e-05, "loss": 0.0211, "step": 44900 }, { "epoch": 0.9558542083102964, "grad_norm": 1.0170994997024536, "learning_rate": 1.8088334113043852e-05, "loss": 0.0212, "step": 44950 }, { "epoch": 0.9569174499213201, "grad_norm": 0.30348825454711914, "learning_rate": 1.8086207629821803e-05, "loss": 0.0224, "step": 45000 }, { "epoch": 0.9569174499213201, "eval_f1": 0.6560856357320413, "eval_loss": 0.021956317126750946, "eval_precision": 0.8230028007467972, "eval_recall": 0.5454585632917035, "eval_runtime": 235.7355, "eval_samples_per_second": 398.964, "eval_steps_per_second": 12.472, "step": 45000 }, { "epoch": 0.9579806915323438, "grad_norm": 0.80409836769104, "learning_rate": 1.8084081146599753e-05, "loss": 0.027, "step": 45050 }, { "epoch": 0.9590439331433674, "grad_norm": 0.6965144872665405, "learning_rate": 1.8081954663377707e-05, "loss": 0.0224, "step": 45100 }, { "epoch": 0.9601071747543912, "grad_norm": 1.5917588472366333, "learning_rate": 1.807982818015566e-05, "loss": 0.0207, "step": 45150 }, { "epoch": 0.9611704163654149, "grad_norm": 0.7885081171989441, "learning_rate": 1.807770169693361e-05, "loss": 0.0257, "step": 45200 }, { "epoch": 0.9611704163654149, "eval_f1": 0.6485560218694709, "eval_loss": 0.022412916645407677, "eval_precision": 0.8540997219914743, "eval_recall": 0.5227527608819065, "eval_runtime": 235.3295, "eval_samples_per_second": 399.652, "eval_steps_per_second": 12.493, "step": 45200 }, { "epoch": 0.9622336579764386, "grad_norm": 0.9407171010971069, "learning_rate": 1.8075575213711564e-05, "loss": 0.0221, "step": 45250 }, { "epoch": 0.9632968995874622, "grad_norm": 0.6175448298454285, "learning_rate": 1.8073448730489518e-05, "loss": 0.0242, "step": 45300 }, { "epoch": 0.9643601411984859, "grad_norm": 0.2607942521572113, "learning_rate": 1.8071322247267472e-05, "loss": 0.0214, "step": 45350 }, { "epoch": 0.9654233828095097, "grad_norm": 0.3810420036315918, "learning_rate": 1.8069195764045426e-05, "loss": 0.0217, "step": 45400 }, { "epoch": 0.9654233828095097, "eval_f1": 0.6541095273533062, "eval_loss": 0.023274360224604607, "eval_precision": 0.8205568522840263, "eval_recall": 0.5438012054515723, "eval_runtime": 235.2746, "eval_samples_per_second": 399.746, "eval_steps_per_second": 12.496, "step": 45400 }, { "epoch": 0.9664866244205333, "grad_norm": 0.40521523356437683, "learning_rate": 1.8067069280823376e-05, "loss": 0.025, "step": 45450 }, { "epoch": 0.967549866031557, "grad_norm": 0.4032803475856781, "learning_rate": 1.806494279760133e-05, "loss": 0.0254, "step": 45500 }, { "epoch": 0.9686131076425807, "grad_norm": 0.38275355100631714, "learning_rate": 1.806281631437928e-05, "loss": 0.0272, "step": 45550 }, { "epoch": 0.9696763492536044, "grad_norm": 0.35086342692375183, "learning_rate": 1.8060689831157234e-05, "loss": 0.022, "step": 45600 }, { "epoch": 0.9696763492536044, "eval_f1": 0.6584672873484646, "eval_loss": 0.022640280425548553, "eval_precision": 0.8105791986042363, "eval_recall": 0.5544248692068131, "eval_runtime": 235.2399, "eval_samples_per_second": 399.805, "eval_steps_per_second": 12.498, "step": 45600 }, { "epoch": 0.9707395908646281, "grad_norm": 0.3370625376701355, "learning_rate": 1.8058563347935184e-05, "loss": 0.0261, "step": 45650 }, { "epoch": 0.9718028324756518, "grad_norm": 0.23653966188430786, "learning_rate": 1.8056436864713138e-05, "loss": 0.0213, "step": 45700 }, { "epoch": 0.9728660740866755, "grad_norm": 0.4711679518222809, "learning_rate": 1.805431038149109e-05, "loss": 0.0197, "step": 45750 }, { "epoch": 0.9739293156976991, "grad_norm": 0.7459168434143066, "learning_rate": 1.8052183898269045e-05, "loss": 0.0217, "step": 45800 }, { "epoch": 0.9739293156976991, "eval_f1": 0.6639520151813016, "eval_loss": 0.0223930012434721, "eval_precision": 0.8233918319948297, "eval_recall": 0.5562424383048237, "eval_runtime": 235.3806, "eval_samples_per_second": 399.566, "eval_steps_per_second": 12.49, "step": 45800 }, { "epoch": 0.9749925573087228, "grad_norm": 0.24484111368656158, "learning_rate": 1.8050057415047e-05, "loss": 0.0218, "step": 45850 }, { "epoch": 0.9760557989197465, "grad_norm": 0.676565408706665, "learning_rate": 1.804793093182495e-05, "loss": 0.0269, "step": 45900 }, { "epoch": 0.9771190405307703, "grad_norm": 0.404123455286026, "learning_rate": 1.8045804448602903e-05, "loss": 0.0257, "step": 45950 }, { "epoch": 0.9781822821417939, "grad_norm": 0.48964717984199524, "learning_rate": 1.8043677965380853e-05, "loss": 0.0263, "step": 46000 }, { "epoch": 0.9781822821417939, "eval_f1": 0.6559616893358394, "eval_loss": 0.021852504462003708, "eval_precision": 0.8417727544158027, "eval_recall": 0.5373485589273282, "eval_runtime": 235.4602, "eval_samples_per_second": 399.431, "eval_steps_per_second": 12.486, "step": 46000 }, { "epoch": 0.9792455237528176, "grad_norm": 0.26170140504837036, "learning_rate": 1.8041551482158807e-05, "loss": 0.0204, "step": 46050 }, { "epoch": 0.9803087653638413, "grad_norm": 0.6679876446723938, "learning_rate": 1.803942499893676e-05, "loss": 0.0251, "step": 46100 }, { "epoch": 0.9813720069748649, "grad_norm": 1.5096321105957031, "learning_rate": 1.803729851571471e-05, "loss": 0.02, "step": 46150 }, { "epoch": 0.9824352485858887, "grad_norm": 0.361203134059906, "learning_rate": 1.8035172032492665e-05, "loss": 0.02, "step": 46200 }, { "epoch": 0.9824352485858887, "eval_f1": 0.6515427569135667, "eval_loss": 0.022440824657678604, "eval_precision": 0.8573373117105949, "eval_recall": 0.5254211070045176, "eval_runtime": 235.2988, "eval_samples_per_second": 399.705, "eval_steps_per_second": 12.495, "step": 46200 }, { "epoch": 0.9834984901969124, "grad_norm": 1.0130541324615479, "learning_rate": 1.803304554927062e-05, "loss": 0.0269, "step": 46250 }, { "epoch": 0.984561731807936, "grad_norm": 0.17640730738639832, "learning_rate": 1.8030919066048572e-05, "loss": 0.0232, "step": 46300 }, { "epoch": 0.9856249734189597, "grad_norm": 1.032755970954895, "learning_rate": 1.8028792582826523e-05, "loss": 0.0218, "step": 46350 }, { "epoch": 0.9866882150299834, "grad_norm": 0.632682204246521, "learning_rate": 1.8026666099604476e-05, "loss": 0.024, "step": 46400 }, { "epoch": 0.9866882150299834, "eval_f1": 0.6583974217592286, "eval_loss": 0.022302648052573204, "eval_precision": 0.8210444644374617, "eval_recall": 0.5495356635784262, "eval_runtime": 235.175, "eval_samples_per_second": 399.915, "eval_steps_per_second": 12.501, "step": 46400 }, { "epoch": 0.987751456641007, "grad_norm": 0.3567107915878296, "learning_rate": 1.8024539616382427e-05, "loss": 0.028, "step": 46450 }, { "epoch": 0.9888146982520308, "grad_norm": 0.440664678812027, "learning_rate": 1.802241313316038e-05, "loss": 0.0246, "step": 46500 }, { "epoch": 0.9898779398630545, "grad_norm": 2.398423910140991, "learning_rate": 1.8020286649938334e-05, "loss": 0.0209, "step": 46550 }, { "epoch": 0.9909411814740782, "grad_norm": 0.5326344966888428, "learning_rate": 1.8018160166716284e-05, "loss": 0.0304, "step": 46600 }, { "epoch": 0.9909411814740782, "eval_f1": 0.6622337126919737, "eval_loss": 0.02229255996644497, "eval_precision": 0.8235158804488241, "eval_recall": 0.5537784996491619, "eval_runtime": 235.422, "eval_samples_per_second": 399.495, "eval_steps_per_second": 12.488, "step": 46600 }, { "epoch": 0.9920044230851018, "grad_norm": 0.22957830131053925, "learning_rate": 1.8016033683494238e-05, "loss": 0.0245, "step": 46650 }, { "epoch": 0.9930676646961255, "grad_norm": 1.9329959154129028, "learning_rate": 1.8013907200272192e-05, "loss": 0.02, "step": 46700 }, { "epoch": 0.9941309063071493, "grad_norm": 0.5953235626220703, "learning_rate": 1.8011780717050146e-05, "loss": 0.0213, "step": 46750 }, { "epoch": 0.995194147918173, "grad_norm": 0.21336813271045685, "learning_rate": 1.8009654233828096e-05, "loss": 0.0223, "step": 46800 }, { "epoch": 0.995194147918173, "eval_f1": 0.6590354515939895, "eval_loss": 0.022227203473448753, "eval_precision": 0.8189516526076723, "eval_recall": 0.551369806254838, "eval_runtime": 236.7868, "eval_samples_per_second": 397.193, "eval_steps_per_second": 12.416, "step": 46800 }, { "epoch": 0.9962573895291966, "grad_norm": 0.6221199035644531, "learning_rate": 1.800752775060605e-05, "loss": 0.0276, "step": 46850 }, { "epoch": 0.9973206311402203, "grad_norm": 0.3366697132587433, "learning_rate": 1.8005401267384003e-05, "loss": 0.0219, "step": 46900 }, { "epoch": 0.998383872751244, "grad_norm": 0.5205787420272827, "learning_rate": 1.8003274784161954e-05, "loss": 0.0178, "step": 46950 }, { "epoch": 0.9994471143622676, "grad_norm": 0.3288123309612274, "learning_rate": 1.8001148300939907e-05, "loss": 0.0243, "step": 47000 }, { "epoch": 0.9994471143622676, "eval_f1": 0.6471421968667042, "eval_loss": 0.02196415700018406, "eval_precision": 0.8410464376589588, "eval_recall": 0.5258962162520219, "eval_runtime": 236.7955, "eval_samples_per_second": 397.178, "eval_steps_per_second": 12.416, "step": 47000 }, { "epoch": 1.0005103559732913, "grad_norm": 0.28049805760383606, "learning_rate": 1.7999021817717858e-05, "loss": 0.0173, "step": 47050 }, { "epoch": 1.001573597584315, "grad_norm": 0.49257463216781616, "learning_rate": 1.799689533449581e-05, "loss": 0.0226, "step": 47100 }, { "epoch": 1.0026368391953389, "grad_norm": 0.6036893725395203, "learning_rate": 1.7994768851273765e-05, "loss": 0.0229, "step": 47150 }, { "epoch": 1.0037000808063625, "grad_norm": 0.9075455069541931, "learning_rate": 1.799264236805172e-05, "loss": 0.0222, "step": 47200 }, { "epoch": 1.0037000808063625, "eval_f1": 0.661504979078248, "eval_loss": 0.022889547049999237, "eval_precision": 0.8148621330960771, "eval_recall": 0.5567285966045954, "eval_runtime": 236.2209, "eval_samples_per_second": 398.144, "eval_steps_per_second": 12.446, "step": 47200 }, { "epoch": 1.0047633224173862, "grad_norm": 0.3315422236919403, "learning_rate": 1.7990515884829673e-05, "loss": 0.0218, "step": 47250 }, { "epoch": 1.0058265640284099, "grad_norm": 0.6378653049468994, "learning_rate": 1.7988389401607623e-05, "loss": 0.0237, "step": 47300 }, { "epoch": 1.0068898056394335, "grad_norm": 0.36265480518341064, "learning_rate": 1.7986262918385577e-05, "loss": 0.0177, "step": 47350 }, { "epoch": 1.0079530472504572, "grad_norm": 0.6631176471710205, "learning_rate": 1.7984136435163527e-05, "loss": 0.022, "step": 47400 }, { "epoch": 1.0079530472504572, "eval_f1": 0.6525584116732397, "eval_loss": 0.023879051208496094, "eval_precision": 0.8269457372226184, "eval_recall": 0.5389119998231854, "eval_runtime": 236.3991, "eval_samples_per_second": 397.844, "eval_steps_per_second": 12.437, "step": 47400 }, { "epoch": 1.0090162888614809, "grad_norm": 0.5721559524536133, "learning_rate": 1.798200995194148e-05, "loss": 0.0188, "step": 47450 }, { "epoch": 1.0100795304725045, "grad_norm": 0.575474202632904, "learning_rate": 1.797988346871943e-05, "loss": 0.0187, "step": 47500 }, { "epoch": 1.0111427720835282, "grad_norm": 2.4649784564971924, "learning_rate": 1.7977756985497385e-05, "loss": 0.0215, "step": 47550 }, { "epoch": 1.0122060136945519, "grad_norm": 0.19339551031589508, "learning_rate": 1.797563050227534e-05, "loss": 0.0221, "step": 47600 }, { "epoch": 1.0122060136945519, "eval_f1": 0.6617532019904852, "eval_loss": 0.023198522627353668, "eval_precision": 0.8319668016464616, "eval_recall": 0.5493588787421455, "eval_runtime": 236.2909, "eval_samples_per_second": 398.026, "eval_steps_per_second": 12.442, "step": 47600 }, { "epoch": 1.0132692553055755, "grad_norm": 0.22632229328155518, "learning_rate": 1.7973504019053292e-05, "loss": 0.0231, "step": 47650 }, { "epoch": 1.0143324969165994, "grad_norm": 0.4205799102783203, "learning_rate": 1.7971377535831246e-05, "loss": 0.0211, "step": 47700 }, { "epoch": 1.015395738527623, "grad_norm": 0.7382022738456726, "learning_rate": 1.7969251052609196e-05, "loss": 0.0189, "step": 47750 }, { "epoch": 1.0164589801386468, "grad_norm": 0.4753507077693939, "learning_rate": 1.796712456938715e-05, "loss": 0.021, "step": 47800 }, { "epoch": 1.0164589801386468, "eval_f1": 0.6592387328767602, "eval_loss": 0.022166311740875244, "eval_precision": 0.8358490085778302, "eval_recall": 0.544243167542274, "eval_runtime": 236.6674, "eval_samples_per_second": 397.393, "eval_steps_per_second": 12.422, "step": 47800 }, { "epoch": 1.0175222217496704, "grad_norm": 0.32036691904067993, "learning_rate": 1.79649980861651e-05, "loss": 0.0209, "step": 47850 }, { "epoch": 1.018585463360694, "grad_norm": 2.0262179374694824, "learning_rate": 1.7962871602943054e-05, "loss": 0.0276, "step": 47900 }, { "epoch": 1.0196487049717178, "grad_norm": 0.33548668026924133, "learning_rate": 1.7960745119721004e-05, "loss": 0.0196, "step": 47950 }, { "epoch": 1.0207119465827414, "grad_norm": 0.35151249170303345, "learning_rate": 1.7958618636498958e-05, "loss": 0.0202, "step": 48000 }, { "epoch": 1.0207119465827414, "eval_f1": 0.6597830587512024, "eval_loss": 0.022238589823246002, "eval_precision": 0.8271310922549711, "eval_recall": 0.5487567053935645, "eval_runtime": 236.3728, "eval_samples_per_second": 397.888, "eval_steps_per_second": 12.438, "step": 48000 }, { "epoch": 1.0217751881937651, "grad_norm": 1.8651045560836792, "learning_rate": 1.7956492153276912e-05, "loss": 0.0243, "step": 48050 }, { "epoch": 1.0228384298047888, "grad_norm": 1.58870267868042, "learning_rate": 1.7954365670054865e-05, "loss": 0.0237, "step": 48100 }, { "epoch": 1.0239016714158125, "grad_norm": 0.5927442908287048, "learning_rate": 1.795223918683282e-05, "loss": 0.0212, "step": 48150 }, { "epoch": 1.0249649130268361, "grad_norm": 0.40839749574661255, "learning_rate": 1.795011270361077e-05, "loss": 0.018, "step": 48200 }, { "epoch": 1.0249649130268361, "eval_f1": 0.6626417590121347, "eval_loss": 0.02345540188252926, "eval_precision": 0.8244973479708215, "eval_recall": 0.5539055637502387, "eval_runtime": 236.4266, "eval_samples_per_second": 397.798, "eval_steps_per_second": 12.435, "step": 48200 }, { "epoch": 1.02602815463786, "grad_norm": 0.24056392908096313, "learning_rate": 1.7947986220388723e-05, "loss": 0.0214, "step": 48250 }, { "epoch": 1.0270913962488837, "grad_norm": 0.14323708415031433, "learning_rate": 1.7945859737166677e-05, "loss": 0.0172, "step": 48300 }, { "epoch": 1.0281546378599073, "grad_norm": 0.4786272644996643, "learning_rate": 1.7943733253944627e-05, "loss": 0.0209, "step": 48350 }, { "epoch": 1.029217879470931, "grad_norm": 0.5297778248786926, "learning_rate": 1.794160677072258e-05, "loss": 0.0226, "step": 48400 }, { "epoch": 1.029217879470931, "eval_f1": 0.6605205954052104, "eval_loss": 0.022209422662854195, "eval_precision": 0.8204793885867778, "eval_recall": 0.5527564623144144, "eval_runtime": 236.8115, "eval_samples_per_second": 397.151, "eval_steps_per_second": 12.415, "step": 48400 }, { "epoch": 1.0302811210819547, "grad_norm": 0.38406991958618164, "learning_rate": 1.793948028750053e-05, "loss": 0.0201, "step": 48450 }, { "epoch": 1.0313443626929784, "grad_norm": 0.9394638538360596, "learning_rate": 1.7937353804278485e-05, "loss": 0.025, "step": 48500 }, { "epoch": 1.032407604304002, "grad_norm": 0.7214770913124084, "learning_rate": 1.793522732105644e-05, "loss": 0.0241, "step": 48550 }, { "epoch": 1.0334708459150257, "grad_norm": 1.1211806535720825, "learning_rate": 1.7933100837834393e-05, "loss": 0.0225, "step": 48600 }, { "epoch": 1.0334708459150257, "eval_f1": 0.6585886369378436, "eval_loss": 0.021977972239255905, "eval_precision": 0.853541838906502, "eval_recall": 0.5361331631778987, "eval_runtime": 237.1425, "eval_samples_per_second": 396.597, "eval_steps_per_second": 12.398, "step": 48600 }, { "epoch": 1.0345340875260494, "grad_norm": 1.0048131942749023, "learning_rate": 1.7930974354612343e-05, "loss": 0.0207, "step": 48650 }, { "epoch": 1.035597329137073, "grad_norm": 1.5988162755966187, "learning_rate": 1.7928847871390297e-05, "loss": 0.0182, "step": 48700 }, { "epoch": 1.0366605707480967, "grad_norm": 0.35983726382255554, "learning_rate": 1.792672138816825e-05, "loss": 0.0199, "step": 48750 }, { "epoch": 1.0377238123591206, "grad_norm": 0.17894881963729858, "learning_rate": 1.79245949049462e-05, "loss": 0.0167, "step": 48800 }, { "epoch": 1.0377238123591206, "eval_f1": 0.6532870576221719, "eval_loss": 0.0220212172716856, "eval_precision": 0.8334847757508543, "eval_recall": 0.5371552005126463, "eval_runtime": 237.0638, "eval_samples_per_second": 396.729, "eval_steps_per_second": 12.402, "step": 48800 }, { "epoch": 1.0387870539701443, "grad_norm": 0.5063978433609009, "learning_rate": 1.7922468421724154e-05, "loss": 0.0199, "step": 48850 }, { "epoch": 1.039850295581168, "grad_norm": 0.3937112092971802, "learning_rate": 1.7920341938502105e-05, "loss": 0.0208, "step": 48900 }, { "epoch": 1.0409135371921916, "grad_norm": 1.2546190023422241, "learning_rate": 1.791821545528006e-05, "loss": 0.0197, "step": 48950 }, { "epoch": 1.0419767788032153, "grad_norm": 0.23015424609184265, "learning_rate": 1.7916088972058012e-05, "loss": 0.0209, "step": 49000 }, { "epoch": 1.0419767788032153, "eval_f1": 0.6575648389615183, "eval_loss": 0.021588977426290512, "eval_precision": 0.8527495553872222, "eval_recall": 0.5350890277386161, "eval_runtime": 237.0116, "eval_samples_per_second": 396.816, "eval_steps_per_second": 12.404, "step": 49000 }, { "epoch": 1.043040020414239, "grad_norm": 0.29421061277389526, "learning_rate": 1.7913962488835966e-05, "loss": 0.0194, "step": 49050 }, { "epoch": 1.0441032620252626, "grad_norm": 0.20304490625858307, "learning_rate": 1.7911836005613916e-05, "loss": 0.0262, "step": 49100 }, { "epoch": 1.0451665036362863, "grad_norm": 0.1389387845993042, "learning_rate": 1.790970952239187e-05, "loss": 0.022, "step": 49150 }, { "epoch": 1.04622974524731, "grad_norm": 0.21145908534526825, "learning_rate": 1.7907583039169824e-05, "loss": 0.0173, "step": 49200 }, { "epoch": 1.04622974524731, "eval_f1": 0.6577262196524455, "eval_loss": 0.02163584902882576, "eval_precision": 0.8518215920371037, "eval_recall": 0.535669102982662, "eval_runtime": 236.8698, "eval_samples_per_second": 397.054, "eval_steps_per_second": 12.412, "step": 49200 }, { "epoch": 1.0472929868583336, "grad_norm": 1.0299135446548462, "learning_rate": 1.7905456555947774e-05, "loss": 0.0221, "step": 49250 }, { "epoch": 1.0483562284693573, "grad_norm": 0.25395411252975464, "learning_rate": 1.7903330072725728e-05, "loss": 0.0208, "step": 49300 }, { "epoch": 1.0494194700803812, "grad_norm": 0.7397930026054382, "learning_rate": 1.7901203589503678e-05, "loss": 0.0202, "step": 49350 }, { "epoch": 1.0504827116914048, "grad_norm": 0.3872850835323334, "learning_rate": 1.7899077106281632e-05, "loss": 0.0184, "step": 49400 }, { "epoch": 1.0504827116914048, "eval_f1": 0.658427290830412, "eval_loss": 0.021826863288879395, "eval_precision": 0.8591118462002829, "eval_recall": 0.5337465678881099, "eval_runtime": 237.3688, "eval_samples_per_second": 396.219, "eval_steps_per_second": 12.386, "step": 49400 }, { "epoch": 1.0515459533024285, "grad_norm": 0.24045945703983307, "learning_rate": 1.7896950623059585e-05, "loss": 0.0199, "step": 49450 }, { "epoch": 1.0526091949134522, "grad_norm": 1.3590166568756104, "learning_rate": 1.789482413983754e-05, "loss": 0.0242, "step": 49500 }, { "epoch": 1.0536724365244758, "grad_norm": 0.45266029238700867, "learning_rate": 1.789269765661549e-05, "loss": 0.0197, "step": 49550 }, { "epoch": 1.0547356781354995, "grad_norm": 0.2791209816932678, "learning_rate": 1.7890571173393443e-05, "loss": 0.021, "step": 49600 }, { "epoch": 1.0547356781354995, "eval_f1": 0.6601622051324415, "eval_loss": 0.021741166710853577, "eval_precision": 0.8370198793493321, "eval_recall": 0.5450055521487343, "eval_runtime": 237.3153, "eval_samples_per_second": 396.308, "eval_steps_per_second": 12.389, "step": 49600 }, { "epoch": 1.0557989197465232, "grad_norm": 0.39755499362945557, "learning_rate": 1.7888444690171397e-05, "loss": 0.0222, "step": 49650 }, { "epoch": 1.0568621613575468, "grad_norm": 0.7053915858268738, "learning_rate": 1.788631820694935e-05, "loss": 0.0237, "step": 49700 }, { "epoch": 1.0579254029685705, "grad_norm": 0.185197651386261, "learning_rate": 1.78841917237273e-05, "loss": 0.0198, "step": 49750 }, { "epoch": 1.0589886445795942, "grad_norm": 0.326768159866333, "learning_rate": 1.7882065240505255e-05, "loss": 0.0205, "step": 49800 }, { "epoch": 1.0589886445795942, "eval_f1": 0.6592173430069673, "eval_loss": 0.02150423638522625, "eval_precision": 0.8506947780290444, "eval_recall": 0.5380998944815211, "eval_runtime": 236.6213, "eval_samples_per_second": 397.471, "eval_steps_per_second": 12.425, "step": 49800 }, { "epoch": 1.0600518861906179, "grad_norm": 0.5214137434959412, "learning_rate": 1.7879938757283205e-05, "loss": 0.023, "step": 49850 }, { "epoch": 1.0611151278016417, "grad_norm": 0.370972216129303, "learning_rate": 1.787781227406116e-05, "loss": 0.0209, "step": 49900 }, { "epoch": 1.0621783694126654, "grad_norm": 0.27231454849243164, "learning_rate": 1.7875685790839112e-05, "loss": 0.0212, "step": 49950 }, { "epoch": 1.063241611023689, "grad_norm": 1.1229701042175293, "learning_rate": 1.7873559307617063e-05, "loss": 0.0177, "step": 50000 }, { "epoch": 1.063241611023689, "eval_f1": 0.6611954696352044, "eval_loss": 0.021698100492358208, "eval_precision": 0.8350854416239527, "eval_recall": 0.5472429852329114, "eval_runtime": 234.8918, "eval_samples_per_second": 400.397, "eval_steps_per_second": 12.516, "step": 50000 }, { "epoch": 1.0643048526347128, "grad_norm": 0.5960607528686523, "learning_rate": 1.7871432824395017e-05, "loss": 0.0193, "step": 50050 }, { "epoch": 1.0653680942457364, "grad_norm": 0.4463179409503937, "learning_rate": 1.786930634117297e-05, "loss": 0.0207, "step": 50100 }, { "epoch": 1.06643133585676, "grad_norm": 0.6027723550796509, "learning_rate": 1.7867179857950924e-05, "loss": 0.0229, "step": 50150 }, { "epoch": 1.0674945774677838, "grad_norm": 0.5614972114562988, "learning_rate": 1.7865053374728874e-05, "loss": 0.0225, "step": 50200 }, { "epoch": 1.0674945774677838, "eval_f1": 0.6602567298905838, "eval_loss": 0.02165035903453827, "eval_precision": 0.8479342137186885, "eval_recall": 0.5406025048201192, "eval_runtime": 234.8319, "eval_samples_per_second": 400.499, "eval_steps_per_second": 12.52, "step": 50200 }, { "epoch": 1.0685578190788074, "grad_norm": 0.36400240659713745, "learning_rate": 1.7862926891506828e-05, "loss": 0.0185, "step": 50250 }, { "epoch": 1.069621060689831, "grad_norm": 0.8248536586761475, "learning_rate": 1.786080040828478e-05, "loss": 0.0316, "step": 50300 }, { "epoch": 1.0706843023008548, "grad_norm": 0.3949924111366272, "learning_rate": 1.7858673925062732e-05, "loss": 0.0201, "step": 50350 }, { "epoch": 1.0717475439118784, "grad_norm": 0.3715271055698395, "learning_rate": 1.7856547441840686e-05, "loss": 0.0223, "step": 50400 }, { "epoch": 1.0717475439118784, "eval_f1": 0.6592513450104974, "eval_loss": 0.02131025679409504, "eval_precision": 0.8363852556479671, "eval_recall": 0.5440332355491907, "eval_runtime": 235.0571, "eval_samples_per_second": 400.115, "eval_steps_per_second": 12.508, "step": 50400 }, { "epoch": 1.0728107855229023, "grad_norm": 2.029324769973755, "learning_rate": 1.785442095861864e-05, "loss": 0.0226, "step": 50450 }, { "epoch": 1.073874027133926, "grad_norm": 0.0649937242269516, "learning_rate": 1.785229447539659e-05, "loss": 0.0196, "step": 50500 }, { "epoch": 1.0749372687449497, "grad_norm": 0.3925321102142334, "learning_rate": 1.7850167992174544e-05, "loss": 0.0216, "step": 50550 }, { "epoch": 1.0760005103559733, "grad_norm": 0.41144809126853943, "learning_rate": 1.7848041508952497e-05, "loss": 0.0213, "step": 50600 }, { "epoch": 1.0760005103559733, "eval_f1": 0.6631432534447749, "eval_loss": 0.021575015038251877, "eval_precision": 0.8364006459860679, "eval_recall": 0.5493478296898779, "eval_runtime": 234.5334, "eval_samples_per_second": 401.009, "eval_steps_per_second": 12.536, "step": 50600 }, { "epoch": 1.077063751966997, "grad_norm": 1.0000407695770264, "learning_rate": 1.7845915025730448e-05, "loss": 0.0172, "step": 50650 }, { "epoch": 1.0781269935780207, "grad_norm": 0.7520067095756531, "learning_rate": 1.78437885425084e-05, "loss": 0.0261, "step": 50700 }, { "epoch": 1.0791902351890443, "grad_norm": 0.5009575486183167, "learning_rate": 1.784166205928635e-05, "loss": 0.0181, "step": 50750 }, { "epoch": 1.080253476800068, "grad_norm": 0.8447592854499817, "learning_rate": 1.7839535576064305e-05, "loss": 0.0198, "step": 50800 }, { "epoch": 1.080253476800068, "eval_f1": 0.6642579646867341, "eval_loss": 0.021424219012260437, "eval_precision": 0.8432189341063137, "eval_recall": 0.5479611736303015, "eval_runtime": 234.6219, "eval_samples_per_second": 400.858, "eval_steps_per_second": 12.531, "step": 50800 }, { "epoch": 1.0813167184110917, "grad_norm": 0.44536396861076355, "learning_rate": 1.783740909284226e-05, "loss": 0.0235, "step": 50850 }, { "epoch": 1.0823799600221153, "grad_norm": 0.2528201639652252, "learning_rate": 1.7835282609620213e-05, "loss": 0.0219, "step": 50900 }, { "epoch": 1.083443201633139, "grad_norm": 0.17931537330150604, "learning_rate": 1.7833156126398163e-05, "loss": 0.0212, "step": 50950 }, { "epoch": 1.084506443244163, "grad_norm": 0.6151547431945801, "learning_rate": 1.7831029643176117e-05, "loss": 0.023, "step": 51000 }, { "epoch": 1.084506443244163, "eval_f1": 0.6578193807501512, "eval_loss": 0.021120019257068634, "eval_precision": 0.8491179323179421, "eval_recall": 0.5368679251536902, "eval_runtime": 235.2635, "eval_samples_per_second": 399.765, "eval_steps_per_second": 12.497, "step": 51000 }, { "epoch": 1.0855696848551866, "grad_norm": 0.30933907628059387, "learning_rate": 1.782890315995407e-05, "loss": 0.0221, "step": 51050 }, { "epoch": 1.0866329264662102, "grad_norm": 0.9422376155853271, "learning_rate": 1.7826776676732024e-05, "loss": 0.0192, "step": 51100 }, { "epoch": 1.087696168077234, "grad_norm": 0.5123322606086731, "learning_rate": 1.7824650193509975e-05, "loss": 0.0219, "step": 51150 }, { "epoch": 1.0887594096882576, "grad_norm": 0.67152339220047, "learning_rate": 1.782252371028793e-05, "loss": 0.0193, "step": 51200 }, { "epoch": 1.0887594096882576, "eval_f1": 0.663333811433417, "eval_loss": 0.021180568262934685, "eval_precision": 0.8499637405897463, "eval_recall": 0.5439061714481139, "eval_runtime": 235.3868, "eval_samples_per_second": 399.555, "eval_steps_per_second": 12.49, "step": 51200 }, { "epoch": 1.0898226512992812, "grad_norm": 0.4128761887550354, "learning_rate": 1.782039722706588e-05, "loss": 0.0187, "step": 51250 }, { "epoch": 1.090885892910305, "grad_norm": 3.7469699382781982, "learning_rate": 1.7818270743843832e-05, "loss": 0.0199, "step": 51300 }, { "epoch": 1.0919491345213286, "grad_norm": 1.232168197631836, "learning_rate": 1.7816144260621786e-05, "loss": 0.0225, "step": 51350 }, { "epoch": 1.0930123761323522, "grad_norm": 0.4364677965641022, "learning_rate": 1.7814017777399736e-05, "loss": 0.0207, "step": 51400 }, { "epoch": 1.0930123761323522, "eval_f1": 0.658915987736994, "eval_loss": 0.022359855473041534, "eval_precision": 0.8552748754573156, "eval_recall": 0.5358845595018791, "eval_runtime": 234.763, "eval_samples_per_second": 400.617, "eval_steps_per_second": 12.523, "step": 51400 }, { "epoch": 1.094075617743376, "grad_norm": 0.48993128538131714, "learning_rate": 1.781189129417769e-05, "loss": 0.0199, "step": 51450 }, { "epoch": 1.0951388593543996, "grad_norm": 0.43171802163124084, "learning_rate": 1.7809764810955644e-05, "loss": 0.0203, "step": 51500 }, { "epoch": 1.0962021009654235, "grad_norm": 0.24085168540477753, "learning_rate": 1.7807638327733598e-05, "loss": 0.0161, "step": 51550 }, { "epoch": 1.0972653425764471, "grad_norm": 0.3960121273994446, "learning_rate": 1.7805511844511548e-05, "loss": 0.0268, "step": 51600 }, { "epoch": 1.0972653425764471, "eval_f1": 0.6610368440180027, "eval_loss": 0.021860795095562935, "eval_precision": 0.830539072294122, "eval_recall": 0.5489942600173167, "eval_runtime": 235.1355, "eval_samples_per_second": 399.982, "eval_steps_per_second": 12.503, "step": 51600 }, { "epoch": 1.0983285841874708, "grad_norm": 0.42953887581825256, "learning_rate": 1.7803385361289502e-05, "loss": 0.0225, "step": 51650 }, { "epoch": 1.0993918257984945, "grad_norm": 0.7643927931785583, "learning_rate": 1.7801258878067452e-05, "loss": 0.0199, "step": 51700 }, { "epoch": 1.1004550674095182, "grad_norm": 0.1723305732011795, "learning_rate": 1.7799132394845406e-05, "loss": 0.0203, "step": 51750 }, { "epoch": 1.1015183090205418, "grad_norm": 0.41631704568862915, "learning_rate": 1.779700591162336e-05, "loss": 0.0208, "step": 51800 }, { "epoch": 1.1015183090205418, "eval_f1": 0.6626966888274458, "eval_loss": 0.02160533517599106, "eval_precision": 0.8350451396178178, "eval_recall": 0.5493202070592091, "eval_runtime": 234.5619, "eval_samples_per_second": 400.96, "eval_steps_per_second": 12.534, "step": 51800 }, { "epoch": 1.1025815506315655, "grad_norm": 1.3292227983474731, "learning_rate": 1.779487942840131e-05, "loss": 0.0179, "step": 51850 }, { "epoch": 1.1036447922425892, "grad_norm": 0.5455632209777832, "learning_rate": 1.7792752945179264e-05, "loss": 0.0167, "step": 51900 }, { "epoch": 1.1047080338536128, "grad_norm": 0.39727479219436646, "learning_rate": 1.7790626461957217e-05, "loss": 0.0218, "step": 51950 }, { "epoch": 1.1057712754646365, "grad_norm": 0.3236803412437439, "learning_rate": 1.778849997873517e-05, "loss": 0.0205, "step": 52000 }, { "epoch": 1.1057712754646365, "eval_f1": 0.6650610459306558, "eval_loss": 0.023634687066078186, "eval_precision": 0.8238631261886403, "eval_recall": 0.5575848981553299, "eval_runtime": 234.6011, "eval_samples_per_second": 400.893, "eval_steps_per_second": 12.532, "step": 52000 }, { "epoch": 1.1068345170756602, "grad_norm": 0.5126294493675232, "learning_rate": 1.778637349551312e-05, "loss": 0.0187, "step": 52050 }, { "epoch": 1.107897758686684, "grad_norm": 0.41331690549850464, "learning_rate": 1.7784247012291075e-05, "loss": 0.022, "step": 52100 }, { "epoch": 1.1089610002977077, "grad_norm": 0.46760207414627075, "learning_rate": 1.7782120529069025e-05, "loss": 0.0192, "step": 52150 }, { "epoch": 1.1100242419087314, "grad_norm": 0.5185506343841553, "learning_rate": 1.777999404584698e-05, "loss": 0.0196, "step": 52200 }, { "epoch": 1.1100242419087314, "eval_f1": 0.6414305153237628, "eval_loss": 0.022020339965820312, "eval_precision": 0.8664235154773064, "eval_recall": 0.5092010982757672, "eval_runtime": 235.0587, "eval_samples_per_second": 400.113, "eval_steps_per_second": 12.508, "step": 52200 }, { "epoch": 1.111087483519755, "grad_norm": 0.2362777143716812, "learning_rate": 1.7777867562624933e-05, "loss": 0.0216, "step": 52250 }, { "epoch": 1.1121507251307787, "grad_norm": 0.44728365540504456, "learning_rate": 1.7775741079402883e-05, "loss": 0.0208, "step": 52300 }, { "epoch": 1.1132139667418024, "grad_norm": 0.42123034596443176, "learning_rate": 1.7773614596180837e-05, "loss": 0.0218, "step": 52350 }, { "epoch": 1.114277208352826, "grad_norm": 0.5050871968269348, "learning_rate": 1.777148811295879e-05, "loss": 0.0186, "step": 52400 }, { "epoch": 1.114277208352826, "eval_f1": 0.6543120564500853, "eval_loss": 0.022381572052836418, "eval_precision": 0.8615738651745455, "eval_recall": 0.5274320345172101, "eval_runtime": 235.0532, "eval_samples_per_second": 400.122, "eval_steps_per_second": 12.508, "step": 52400 }, { "epoch": 1.1153404499638497, "grad_norm": 0.6085136532783508, "learning_rate": 1.7769361629736744e-05, "loss": 0.0196, "step": 52450 }, { "epoch": 1.1164036915748734, "grad_norm": 0.96410071849823, "learning_rate": 1.7767235146514698e-05, "loss": 0.0214, "step": 52500 }, { "epoch": 1.117466933185897, "grad_norm": 0.7370387315750122, "learning_rate": 1.776510866329265e-05, "loss": 0.0227, "step": 52550 }, { "epoch": 1.1185301747969207, "grad_norm": 0.5431768894195557, "learning_rate": 1.7762982180070602e-05, "loss": 0.0223, "step": 52600 }, { "epoch": 1.1185301747969207, "eval_f1": 0.6522931125158552, "eval_loss": 0.02165212109684944, "eval_precision": 0.8453873013359328, "eval_recall": 0.5310064029257596, "eval_runtime": 234.8876, "eval_samples_per_second": 400.404, "eval_steps_per_second": 12.517, "step": 52600 }, { "epoch": 1.1195934164079446, "grad_norm": 0.11016730964183807, "learning_rate": 1.7760855696848552e-05, "loss": 0.0188, "step": 52650 }, { "epoch": 1.1206566580189683, "grad_norm": 1.7227638959884644, "learning_rate": 1.7758729213626506e-05, "loss": 0.0189, "step": 52700 }, { "epoch": 1.121719899629992, "grad_norm": 0.25863170623779297, "learning_rate": 1.7756602730404456e-05, "loss": 0.0146, "step": 52750 }, { "epoch": 1.1227831412410156, "grad_norm": 1.0734578371047974, "learning_rate": 1.775447624718241e-05, "loss": 0.0254, "step": 52800 }, { "epoch": 1.1227831412410156, "eval_f1": 0.6599239928536038, "eval_loss": 0.022657308727502823, "eval_precision": 0.8096718230446075, "eval_recall": 0.5569219550192774, "eval_runtime": 235.0406, "eval_samples_per_second": 400.144, "eval_steps_per_second": 12.508, "step": 52800 }, { "epoch": 1.1238463828520393, "grad_norm": 0.5297976732254028, "learning_rate": 1.7752349763960364e-05, "loss": 0.0211, "step": 52850 }, { "epoch": 1.124909624463063, "grad_norm": 0.44575732946395874, "learning_rate": 1.7750223280738318e-05, "loss": 0.0187, "step": 52900 }, { "epoch": 1.1259728660740866, "grad_norm": 0.22216638922691345, "learning_rate": 1.774809679751627e-05, "loss": 0.0194, "step": 52950 }, { "epoch": 1.1270361076851103, "grad_norm": 0.5638076066970825, "learning_rate": 1.774597031429422e-05, "loss": 0.0182, "step": 53000 }, { "epoch": 1.1270361076851103, "eval_f1": 0.6594857654791241, "eval_loss": 0.02195655182003975, "eval_precision": 0.8350689153035292, "eval_recall": 0.5449116352044602, "eval_runtime": 234.7203, "eval_samples_per_second": 400.69, "eval_steps_per_second": 12.526, "step": 53000 }, { "epoch": 1.128099349296134, "grad_norm": 0.8454571962356567, "learning_rate": 1.7743843831072175e-05, "loss": 0.0214, "step": 53050 }, { "epoch": 1.1291625909071576, "grad_norm": 0.6448068022727966, "learning_rate": 1.7741717347850126e-05, "loss": 0.0209, "step": 53100 }, { "epoch": 1.1302258325181813, "grad_norm": 2.1358530521392822, "learning_rate": 1.773959086462808e-05, "loss": 0.0196, "step": 53150 }, { "epoch": 1.1312890741292052, "grad_norm": 1.1539809703826904, "learning_rate": 1.7737464381406033e-05, "loss": 0.0223, "step": 53200 }, { "epoch": 1.1312890741292052, "eval_f1": 0.6584515322915114, "eval_loss": 0.02110648714005947, "eval_precision": 0.8476689256787173, "eval_recall": 0.538293252896203, "eval_runtime": 234.6961, "eval_samples_per_second": 400.731, "eval_steps_per_second": 12.527, "step": 53200 }, { "epoch": 1.1323523157402289, "grad_norm": 0.37447842955589294, "learning_rate": 1.7735337898183983e-05, "loss": 0.0253, "step": 53250 }, { "epoch": 1.1334155573512525, "grad_norm": 1.065894365310669, "learning_rate": 1.7733211414961937e-05, "loss": 0.0159, "step": 53300 }, { "epoch": 1.1344787989622762, "grad_norm": 0.5399078130722046, "learning_rate": 1.773108493173989e-05, "loss": 0.022, "step": 53350 }, { "epoch": 1.1355420405732999, "grad_norm": 0.7950348854064941, "learning_rate": 1.7728958448517845e-05, "loss": 0.0187, "step": 53400 }, { "epoch": 1.1355420405732999, "eval_f1": 0.6616553282906427, "eval_loss": 0.022441396489739418, "eval_precision": 0.8285871028875677, "eval_recall": 0.5507068631187855, "eval_runtime": 235.0335, "eval_samples_per_second": 400.156, "eval_steps_per_second": 12.509, "step": 53400 }, { "epoch": 1.1366052821843236, "grad_norm": 0.7053166031837463, "learning_rate": 1.7726831965295795e-05, "loss": 0.0204, "step": 53450 }, { "epoch": 1.1376685237953472, "grad_norm": 0.7168750166893005, "learning_rate": 1.772470548207375e-05, "loss": 0.0214, "step": 53500 }, { "epoch": 1.1387317654063709, "grad_norm": 0.3659726679325104, "learning_rate": 1.77225789988517e-05, "loss": 0.0214, "step": 53550 }, { "epoch": 1.1397950070173946, "grad_norm": 0.34546953439712524, "learning_rate": 1.7720452515629653e-05, "loss": 0.0284, "step": 53600 }, { "epoch": 1.1397950070173946, "eval_f1": 0.6618471567775805, "eval_loss": 0.020964013412594795, "eval_precision": 0.8523896578740444, "eval_recall": 0.5409284518620117, "eval_runtime": 234.5163, "eval_samples_per_second": 401.038, "eval_steps_per_second": 12.536, "step": 53600 }, { "epoch": 1.1408582486284182, "grad_norm": 0.4466497004032135, "learning_rate": 1.7718326032407606e-05, "loss": 0.0186, "step": 53650 }, { "epoch": 1.141921490239442, "grad_norm": 0.39319100975990295, "learning_rate": 1.7716199549185557e-05, "loss": 0.0229, "step": 53700 }, { "epoch": 1.1429847318504658, "grad_norm": 0.26080650091171265, "learning_rate": 1.771407306596351e-05, "loss": 0.0213, "step": 53750 }, { "epoch": 1.1440479734614895, "grad_norm": 0.4122430980205536, "learning_rate": 1.7711946582741464e-05, "loss": 0.02, "step": 53800 }, { "epoch": 1.1440479734614895, "eval_f1": 0.6621877247710686, "eval_loss": 0.021086478605866432, "eval_precision": 0.8547611974205429, "eval_recall": 0.5404312445099723, "eval_runtime": 235.0849, "eval_samples_per_second": 400.068, "eval_steps_per_second": 12.506, "step": 53800 }, { "epoch": 1.1451112150725131, "grad_norm": 1.3126413822174072, "learning_rate": 1.7709820099519418e-05, "loss": 0.0174, "step": 53850 }, { "epoch": 1.1461744566835368, "grad_norm": 0.2849137485027313, "learning_rate": 1.7707693616297368e-05, "loss": 0.0216, "step": 53900 }, { "epoch": 1.1472376982945605, "grad_norm": 1.7430180311203003, "learning_rate": 1.7705567133075322e-05, "loss": 0.0201, "step": 53950 }, { "epoch": 1.1483009399055841, "grad_norm": 0.15251776576042175, "learning_rate": 1.7703440649853272e-05, "loss": 0.0196, "step": 54000 }, { "epoch": 1.1483009399055841, "eval_f1": 0.6655607140781157, "eval_loss": 0.021141836419701576, "eval_precision": 0.8381913679687846, "eval_recall": 0.5518946362375462, "eval_runtime": 234.8797, "eval_samples_per_second": 400.418, "eval_steps_per_second": 12.517, "step": 54000 }, { "epoch": 1.1493641815166078, "grad_norm": 0.398562490940094, "learning_rate": 1.7701314166631226e-05, "loss": 0.0205, "step": 54050 }, { "epoch": 1.1504274231276315, "grad_norm": 0.563997745513916, "learning_rate": 1.769918768340918e-05, "loss": 0.0185, "step": 54100 }, { "epoch": 1.1514906647386551, "grad_norm": 0.5947883129119873, "learning_rate": 1.769706120018713e-05, "loss": 0.0209, "step": 54150 }, { "epoch": 1.152553906349679, "grad_norm": 0.40853312611579895, "learning_rate": 1.7694934716965084e-05, "loss": 0.0229, "step": 54200 }, { "epoch": 1.152553906349679, "eval_f1": 0.6584075220423931, "eval_loss": 0.021042723208665848, "eval_precision": 0.8616474708935317, "eval_recall": 0.5327466286578973, "eval_runtime": 234.8165, "eval_samples_per_second": 400.526, "eval_steps_per_second": 12.52, "step": 54200 }, { "epoch": 1.1536171479607025, "grad_norm": 0.322333961725235, "learning_rate": 1.7692808233743038e-05, "loss": 0.0206, "step": 54250 }, { "epoch": 1.1546803895717264, "grad_norm": 0.6067876219749451, "learning_rate": 1.769068175052099e-05, "loss": 0.0235, "step": 54300 }, { "epoch": 1.15574363118275, "grad_norm": 0.3417598307132721, "learning_rate": 1.7688555267298945e-05, "loss": 0.0217, "step": 54350 }, { "epoch": 1.1568068727937737, "grad_norm": 0.23347382247447968, "learning_rate": 1.7686428784076895e-05, "loss": 0.0235, "step": 54400 }, { "epoch": 1.1568068727937737, "eval_f1": 0.6629474116458455, "eval_loss": 0.021094633266329765, "eval_precision": 0.843173541878025, "eval_recall": 0.5461988497936288, "eval_runtime": 234.8659, "eval_samples_per_second": 400.441, "eval_steps_per_second": 12.518, "step": 54400 }, { "epoch": 1.1578701144047974, "grad_norm": 0.4829035699367523, "learning_rate": 1.768430230085485e-05, "loss": 0.0188, "step": 54450 }, { "epoch": 1.158933356015821, "grad_norm": 0.1292632818222046, "learning_rate": 1.76821758176328e-05, "loss": 0.0205, "step": 54500 }, { "epoch": 1.1599965976268447, "grad_norm": 1.0731106996536255, "learning_rate": 1.7680049334410753e-05, "loss": 0.019, "step": 54550 }, { "epoch": 1.1610598392378684, "grad_norm": 0.40920788049697876, "learning_rate": 1.7677922851188703e-05, "loss": 0.022, "step": 54600 }, { "epoch": 1.1610598392378684, "eval_f1": 0.6608779240970988, "eval_loss": 0.020636312663555145, "eval_precision": 0.8355274824565884, "eval_recall": 0.5466187137797953, "eval_runtime": 234.8355, "eval_samples_per_second": 400.493, "eval_steps_per_second": 12.519, "step": 54600 }, { "epoch": 1.162123080848892, "grad_norm": 0.1903243362903595, "learning_rate": 1.7675796367966657e-05, "loss": 0.0236, "step": 54650 }, { "epoch": 1.1631863224599157, "grad_norm": 0.34380748867988586, "learning_rate": 1.767366988474461e-05, "loss": 0.0242, "step": 54700 }, { "epoch": 1.1642495640709396, "grad_norm": 1.1795967817306519, "learning_rate": 1.7671543401522565e-05, "loss": 0.0251, "step": 54750 }, { "epoch": 1.165312805681963, "grad_norm": 0.2930680811405182, "learning_rate": 1.766941691830052e-05, "loss": 0.0216, "step": 54800 }, { "epoch": 1.165312805681963, "eval_f1": 0.6577238445365392, "eval_loss": 0.02119259722530842, "eval_precision": 0.8608953676476391, "eval_recall": 0.5321389307831826, "eval_runtime": 235.3486, "eval_samples_per_second": 399.62, "eval_steps_per_second": 12.492, "step": 54800 }, { "epoch": 1.166376047292987, "grad_norm": 0.7375998497009277, "learning_rate": 1.766729043507847e-05, "loss": 0.021, "step": 54850 }, { "epoch": 1.1674392889040106, "grad_norm": 0.23287101089954376, "learning_rate": 1.7665163951856422e-05, "loss": 0.0248, "step": 54900 }, { "epoch": 1.1685025305150343, "grad_norm": 0.8529135584831238, "learning_rate": 1.7663037468634373e-05, "loss": 0.0213, "step": 54950 }, { "epoch": 1.169565772126058, "grad_norm": 0.2284819483757019, "learning_rate": 1.7660910985412326e-05, "loss": 0.0212, "step": 55000 }, { "epoch": 1.169565772126058, "eval_f1": 0.6618620030123117, "eval_loss": 0.02124536782503128, "eval_precision": 0.8501577560501309, "eval_recall": 0.5418510477263513, "eval_runtime": 234.7365, "eval_samples_per_second": 400.662, "eval_steps_per_second": 12.525, "step": 55000 }, { "epoch": 1.1706290137370816, "grad_norm": 0.5259132385253906, "learning_rate": 1.7658784502190277e-05, "loss": 0.0224, "step": 55050 }, { "epoch": 1.1716922553481053, "grad_norm": 0.8875043988227844, "learning_rate": 1.765665801896823e-05, "loss": 0.0203, "step": 55100 }, { "epoch": 1.172755496959129, "grad_norm": 0.21253390610218048, "learning_rate": 1.7654531535746184e-05, "loss": 0.0209, "step": 55150 }, { "epoch": 1.1738187385701526, "grad_norm": 0.26278412342071533, "learning_rate": 1.7652405052524138e-05, "loss": 0.0283, "step": 55200 }, { "epoch": 1.1738187385701526, "eval_f1": 0.6587861281975654, "eval_loss": 0.020789312198758125, "eval_precision": 0.866873399920594, "eval_recall": 0.5312605311279132, "eval_runtime": 234.6626, "eval_samples_per_second": 400.788, "eval_steps_per_second": 12.529, "step": 55200 }, { "epoch": 1.1748819801811763, "grad_norm": 0.4991321265697479, "learning_rate": 1.765027856930209e-05, "loss": 0.0184, "step": 55250 }, { "epoch": 1.1759452217922002, "grad_norm": 0.8330724239349365, "learning_rate": 1.7648152086080042e-05, "loss": 0.0206, "step": 55300 }, { "epoch": 1.1770084634032236, "grad_norm": 0.1390318125486374, "learning_rate": 1.7646025602857996e-05, "loss": 0.0206, "step": 55350 }, { "epoch": 1.1780717050142475, "grad_norm": 0.6524882316589355, "learning_rate": 1.7643899119635946e-05, "loss": 0.0217, "step": 55400 }, { "epoch": 1.1780717050142475, "eval_f1": 0.664787741174692, "eval_loss": 0.021278226748108864, "eval_precision": 0.8512175772626198, "eval_recall": 0.5453480727690281, "eval_runtime": 234.8869, "eval_samples_per_second": 400.406, "eval_steps_per_second": 12.517, "step": 55400 }, { "epoch": 1.1791349466252712, "grad_norm": 0.8060265183448792, "learning_rate": 1.76417726364139e-05, "loss": 0.0233, "step": 55450 }, { "epoch": 1.1801981882362949, "grad_norm": 0.6496391296386719, "learning_rate": 1.763964615319185e-05, "loss": 0.0232, "step": 55500 }, { "epoch": 1.1812614298473185, "grad_norm": 0.33469459414482117, "learning_rate": 1.7637519669969804e-05, "loss": 0.0161, "step": 55550 }, { "epoch": 1.1823246714583422, "grad_norm": 0.6842149496078491, "learning_rate": 1.7635393186747758e-05, "loss": 0.0194, "step": 55600 }, { "epoch": 1.1823246714583422, "eval_f1": 0.6572014433800702, "eval_loss": 0.02195766568183899, "eval_precision": 0.8593814138601079, "eval_recall": 0.532033964786641, "eval_runtime": 231.9735, "eval_samples_per_second": 405.434, "eval_steps_per_second": 12.674, "step": 55600 }, { "epoch": 1.1833879130693659, "grad_norm": 1.2873395681381226, "learning_rate": 1.763326670352571e-05, "loss": 0.0227, "step": 55650 }, { "epoch": 1.1844511546803895, "grad_norm": 0.9582759141921997, "learning_rate": 1.7631140220303665e-05, "loss": 0.0219, "step": 55700 }, { "epoch": 1.1855143962914132, "grad_norm": 0.09578666090965271, "learning_rate": 1.7629013737081615e-05, "loss": 0.0205, "step": 55750 }, { "epoch": 1.1865776379024369, "grad_norm": 0.833354115486145, "learning_rate": 1.762688725385957e-05, "loss": 0.0205, "step": 55800 }, { "epoch": 1.1865776379024369, "eval_f1": 0.6599448748019678, "eval_loss": 0.021162670105695724, "eval_precision": 0.8624793830516773, "eval_recall": 0.5344426581809649, "eval_runtime": 231.0569, "eval_samples_per_second": 407.043, "eval_steps_per_second": 12.724, "step": 55800 }, { "epoch": 1.1876408795134608, "grad_norm": 0.7451335787773132, "learning_rate": 1.7624760770637523e-05, "loss": 0.0204, "step": 55850 }, { "epoch": 1.1887041211244842, "grad_norm": 0.29400280117988586, "learning_rate": 1.7622634287415473e-05, "loss": 0.0201, "step": 55900 }, { "epoch": 1.189767362735508, "grad_norm": 0.7553412914276123, "learning_rate": 1.7620507804193427e-05, "loss": 0.0241, "step": 55950 }, { "epoch": 1.1908306043465318, "grad_norm": 0.2257997691631317, "learning_rate": 1.7618381320971377e-05, "loss": 0.0235, "step": 56000 }, { "epoch": 1.1908306043465318, "eval_f1": 0.6604426257369578, "eval_loss": 0.022786715999245644, "eval_precision": 0.8435734038715159, "eval_recall": 0.5426410549634805, "eval_runtime": 230.8769, "eval_samples_per_second": 407.36, "eval_steps_per_second": 12.734, "step": 56000 }, { "epoch": 1.1918938459575554, "grad_norm": 0.6447920203208923, "learning_rate": 1.761625483774933e-05, "loss": 0.0182, "step": 56050 }, { "epoch": 1.192957087568579, "grad_norm": 0.612124502658844, "learning_rate": 1.7614128354527285e-05, "loss": 0.0229, "step": 56100 }, { "epoch": 1.1940203291796028, "grad_norm": 0.32440224289894104, "learning_rate": 1.7612001871305238e-05, "loss": 0.0229, "step": 56150 }, { "epoch": 1.1950835707906264, "grad_norm": 0.6561402678489685, "learning_rate": 1.760987538808319e-05, "loss": 0.0189, "step": 56200 }, { "epoch": 1.1950835707906264, "eval_f1": 0.6639792600042665, "eval_loss": 0.021619217470288277, "eval_precision": 0.8514634947527834, "eval_recall": 0.5441602996502674, "eval_runtime": 230.9581, "eval_samples_per_second": 407.217, "eval_steps_per_second": 12.73, "step": 56200 }, { "epoch": 1.19614681240165, "grad_norm": 0.879652738571167, "learning_rate": 1.7607748904861142e-05, "loss": 0.0237, "step": 56250 }, { "epoch": 1.1972100540126738, "grad_norm": 0.5162805318832397, "learning_rate": 1.7605622421639096e-05, "loss": 0.0252, "step": 56300 }, { "epoch": 1.1982732956236974, "grad_norm": 1.154247760772705, "learning_rate": 1.7603495938417046e-05, "loss": 0.0189, "step": 56350 }, { "epoch": 1.1993365372347213, "grad_norm": 1.0627716779708862, "learning_rate": 1.7601369455195e-05, "loss": 0.0194, "step": 56400 }, { "epoch": 1.1993365372347213, "eval_f1": 0.66597392369032, "eval_loss": 0.02178182639181614, "eval_precision": 0.8467380371967025, "eval_recall": 0.5488119506549022, "eval_runtime": 231.0775, "eval_samples_per_second": 407.006, "eval_steps_per_second": 12.723, "step": 56400 }, { "epoch": 1.200399778845745, "grad_norm": 0.4244441092014313, "learning_rate": 1.759924297197295e-05, "loss": 0.0228, "step": 56450 }, { "epoch": 1.2014630204567687, "grad_norm": 1.2596505880355835, "learning_rate": 1.7597116488750904e-05, "loss": 0.0203, "step": 56500 }, { "epoch": 1.2025262620677923, "grad_norm": 0.3071063458919525, "learning_rate": 1.7594990005528858e-05, "loss": 0.0178, "step": 56550 }, { "epoch": 1.203589503678816, "grad_norm": 0.8078606128692627, "learning_rate": 1.759286352230681e-05, "loss": 0.0214, "step": 56600 }, { "epoch": 1.203589503678816, "eval_f1": 0.6652893058489888, "eval_loss": 0.022169381380081177, "eval_precision": 0.8242983938792001, "eval_recall": 0.5577064377302728, "eval_runtime": 231.2628, "eval_samples_per_second": 406.68, "eval_steps_per_second": 12.713, "step": 56600 }, { "epoch": 1.2046527452898397, "grad_norm": 0.30787208676338196, "learning_rate": 1.7590737039084762e-05, "loss": 0.0195, "step": 56650 }, { "epoch": 1.2057159869008633, "grad_norm": 1.2475857734680176, "learning_rate": 1.7588610555862716e-05, "loss": 0.022, "step": 56700 }, { "epoch": 1.206779228511887, "grad_norm": 0.6126956343650818, "learning_rate": 1.758648407264067e-05, "loss": 0.0192, "step": 56750 }, { "epoch": 1.2078424701229107, "grad_norm": 0.6534249186515808, "learning_rate": 1.758435758941862e-05, "loss": 0.0176, "step": 56800 }, { "epoch": 1.2078424701229107, "eval_f1": 0.6645213038805238, "eval_loss": 0.021836601197719574, "eval_precision": 0.8369331722215926, "eval_recall": 0.5510107120561429, "eval_runtime": 230.9634, "eval_samples_per_second": 407.207, "eval_steps_per_second": 12.729, "step": 56800 }, { "epoch": 1.2089057117339344, "grad_norm": 0.2803933024406433, "learning_rate": 1.7582231106196573e-05, "loss": 0.0233, "step": 56850 }, { "epoch": 1.209968953344958, "grad_norm": 0.5043376088142395, "learning_rate": 1.7580104622974524e-05, "loss": 0.0252, "step": 56900 }, { "epoch": 1.211032194955982, "grad_norm": 0.3797815144062042, "learning_rate": 1.7577978139752477e-05, "loss": 0.0139, "step": 56950 }, { "epoch": 1.2120954365670056, "grad_norm": 0.4761132299900055, "learning_rate": 1.757585165653043e-05, "loss": 0.0192, "step": 57000 }, { "epoch": 1.2120954365670056, "eval_f1": 0.658665410873486, "eval_loss": 0.021278681233525276, "eval_precision": 0.8464641382420586, "eval_recall": 0.539066686554931, "eval_runtime": 230.9351, "eval_samples_per_second": 407.257, "eval_steps_per_second": 12.731, "step": 57000 }, { "epoch": 1.2131586781780292, "grad_norm": 0.26769161224365234, "learning_rate": 1.7573725173308385e-05, "loss": 0.0194, "step": 57050 }, { "epoch": 1.214221919789053, "grad_norm": 0.7384342551231384, "learning_rate": 1.7571598690086335e-05, "loss": 0.0177, "step": 57100 }, { "epoch": 1.2152851614000766, "grad_norm": 0.9781543016433716, "learning_rate": 1.756947220686429e-05, "loss": 0.0197, "step": 57150 }, { "epoch": 1.2163484030111003, "grad_norm": 0.2294297218322754, "learning_rate": 1.7567345723642243e-05, "loss": 0.0205, "step": 57200 }, { "epoch": 1.2163484030111003, "eval_f1": 0.6557704299181105, "eval_loss": 0.02084539458155632, "eval_precision": 0.8462149793892925, "eval_recall": 0.5352989597316994, "eval_runtime": 232.5722, "eval_samples_per_second": 404.391, "eval_steps_per_second": 12.641, "step": 57200 }, { "epoch": 1.217411644622124, "grad_norm": 0.20063884556293488, "learning_rate": 1.7565219240420196e-05, "loss": 0.0192, "step": 57250 }, { "epoch": 1.2184748862331476, "grad_norm": 0.2791785001754761, "learning_rate": 1.7563092757198147e-05, "loss": 0.0222, "step": 57300 }, { "epoch": 1.2195381278441713, "grad_norm": 0.960075855255127, "learning_rate": 1.75609662739761e-05, "loss": 0.0211, "step": 57350 }, { "epoch": 1.220601369455195, "grad_norm": 0.3140982687473297, "learning_rate": 1.755883979075405e-05, "loss": 0.0229, "step": 57400 }, { "epoch": 1.220601369455195, "eval_f1": 0.6573019078477189, "eval_loss": 0.020868511870503426, "eval_precision": 0.8608092030521016, "eval_recall": 0.5316196253266082, "eval_runtime": 233.7933, "eval_samples_per_second": 402.278, "eval_steps_per_second": 12.575, "step": 57400 }, { "epoch": 1.2216646110662186, "grad_norm": 0.4005465507507324, "learning_rate": 1.7556713307532005e-05, "loss": 0.0168, "step": 57450 }, { "epoch": 1.2227278526772425, "grad_norm": 0.731907308101654, "learning_rate": 1.7554586824309958e-05, "loss": 0.0188, "step": 57500 }, { "epoch": 1.2237910942882662, "grad_norm": 0.49528008699417114, "learning_rate": 1.7552460341087912e-05, "loss": 0.0231, "step": 57550 }, { "epoch": 1.2248543358992898, "grad_norm": 0.35449784994125366, "learning_rate": 1.7550333857865862e-05, "loss": 0.0213, "step": 57600 }, { "epoch": 1.2248543358992898, "eval_f1": 0.6659822291600205, "eval_loss": 0.02089688368141651, "eval_precision": 0.8522813426148552, "eval_recall": 0.5465192723093875, "eval_runtime": 233.6059, "eval_samples_per_second": 402.601, "eval_steps_per_second": 12.585, "step": 57600 }, { "epoch": 1.2259175775103135, "grad_norm": 0.22064907848834991, "learning_rate": 1.7548207374643816e-05, "loss": 0.0166, "step": 57650 }, { "epoch": 1.2269808191213372, "grad_norm": 0.3918984532356262, "learning_rate": 1.754608089142177e-05, "loss": 0.023, "step": 57700 }, { "epoch": 1.2280440607323608, "grad_norm": 0.4748043119907379, "learning_rate": 1.754395440819972e-05, "loss": 0.0193, "step": 57750 }, { "epoch": 1.2291073023433845, "grad_norm": 5.7543439865112305, "learning_rate": 1.7541827924977674e-05, "loss": 0.024, "step": 57800 }, { "epoch": 1.2291073023433845, "eval_f1": 0.6578086292968257, "eval_loss": 0.020687982439994812, "eval_precision": 0.8473193371704489, "eval_recall": 0.5375750644988129, "eval_runtime": 232.5025, "eval_samples_per_second": 404.512, "eval_steps_per_second": 12.645, "step": 57800 }, { "epoch": 1.2301705439544082, "grad_norm": 1.1126455068588257, "learning_rate": 1.7539701441755624e-05, "loss": 0.0283, "step": 57850 }, { "epoch": 1.2312337855654318, "grad_norm": 0.20160041749477386, "learning_rate": 1.7537574958533578e-05, "loss": 0.0212, "step": 57900 }, { "epoch": 1.2322970271764555, "grad_norm": 0.18024417757987976, "learning_rate": 1.753544847531153e-05, "loss": 0.0199, "step": 57950 }, { "epoch": 1.2333602687874792, "grad_norm": 0.20240627229213715, "learning_rate": 1.7533321992089485e-05, "loss": 0.0205, "step": 58000 }, { "epoch": 1.2333602687874792, "eval_f1": 0.6591025901553514, "eval_loss": 0.02061622217297554, "eval_precision": 0.8683243165128959, "eval_recall": 0.5311279425007026, "eval_runtime": 230.8472, "eval_samples_per_second": 407.412, "eval_steps_per_second": 12.736, "step": 58000 }, { "epoch": 1.234423510398503, "grad_norm": 0.94939124584198, "learning_rate": 1.7531195508867436e-05, "loss": 0.0174, "step": 58050 }, { "epoch": 1.2354867520095267, "grad_norm": 0.743366539478302, "learning_rate": 1.752906902564539e-05, "loss": 0.0207, "step": 58100 }, { "epoch": 1.2365499936205504, "grad_norm": 2.387416124343872, "learning_rate": 1.7526942542423343e-05, "loss": 0.0241, "step": 58150 }, { "epoch": 1.237613235231574, "grad_norm": 1.070416808128357, "learning_rate": 1.7524816059201293e-05, "loss": 0.019, "step": 58200 }, { "epoch": 1.237613235231574, "eval_f1": 0.6650607381247626, "eval_loss": 0.021515367552638054, "eval_precision": 0.8485900700463105, "eval_recall": 0.5468010231422097, "eval_runtime": 230.9756, "eval_samples_per_second": 407.186, "eval_steps_per_second": 12.729, "step": 58200 }, { "epoch": 1.2386764768425977, "grad_norm": 0.9877146482467651, "learning_rate": 1.7522689575979247e-05, "loss": 0.0191, "step": 58250 }, { "epoch": 1.2397397184536214, "grad_norm": 0.5837464928627014, "learning_rate": 1.7520563092757197e-05, "loss": 0.0183, "step": 58300 }, { "epoch": 1.240802960064645, "grad_norm": 0.9360388517379761, "learning_rate": 1.751843660953515e-05, "loss": 0.0196, "step": 58350 }, { "epoch": 1.2418662016756687, "grad_norm": 0.5429843664169312, "learning_rate": 1.7516310126313105e-05, "loss": 0.0199, "step": 58400 }, { "epoch": 1.2418662016756687, "eval_f1": 0.6599850121522395, "eval_loss": 0.021126294508576393, "eval_precision": 0.8479488046616158, "eval_recall": 0.5402323615691565, "eval_runtime": 231.1061, "eval_samples_per_second": 406.956, "eval_steps_per_second": 12.721, "step": 58400 }, { "epoch": 1.2429294432866924, "grad_norm": 1.086408257484436, "learning_rate": 1.751418364309106e-05, "loss": 0.0214, "step": 58450 }, { "epoch": 1.243992684897716, "grad_norm": 0.4774109721183777, "learning_rate": 1.751205715986901e-05, "loss": 0.0218, "step": 58500 }, { "epoch": 1.2450559265087398, "grad_norm": 0.8047826886177063, "learning_rate": 1.7509930676646963e-05, "loss": 0.0204, "step": 58550 }, { "epoch": 1.2461191681197636, "grad_norm": 0.6038611531257629, "learning_rate": 1.7507804193424916e-05, "loss": 0.0212, "step": 58600 }, { "epoch": 1.2461191681197636, "eval_f1": 0.6637617572421523, "eval_loss": 0.02128068171441555, "eval_precision": 0.8458793935452236, "eval_recall": 0.5461712271629598, "eval_runtime": 232.8594, "eval_samples_per_second": 403.892, "eval_steps_per_second": 12.626, "step": 58600 }, { "epoch": 1.2471824097307873, "grad_norm": 1.1743375062942505, "learning_rate": 1.750567771020287e-05, "loss": 0.0245, "step": 58650 }, { "epoch": 1.248245651341811, "grad_norm": 0.372575968503952, "learning_rate": 1.750355122698082e-05, "loss": 0.0224, "step": 58700 }, { "epoch": 1.2493088929528346, "grad_norm": 0.237751305103302, "learning_rate": 1.7501424743758774e-05, "loss": 0.021, "step": 58750 }, { "epoch": 1.2503721345638583, "grad_norm": 0.7088329195976257, "learning_rate": 1.7499298260536724e-05, "loss": 0.0191, "step": 58800 }, { "epoch": 1.2503721345638583, "eval_f1": 0.6558654140002654, "eval_loss": 0.021009502932429314, "eval_precision": 0.8571479516782546, "eval_recall": 0.5311389915529702, "eval_runtime": 232.2407, "eval_samples_per_second": 404.968, "eval_steps_per_second": 12.659, "step": 58800 }, { "epoch": 1.251435376174882, "grad_norm": 1.177181601524353, "learning_rate": 1.7497171777314678e-05, "loss": 0.0203, "step": 58850 }, { "epoch": 1.2524986177859057, "grad_norm": 0.3520826995372772, "learning_rate": 1.7495045294092632e-05, "loss": 0.0294, "step": 58900 }, { "epoch": 1.2535618593969293, "grad_norm": 0.8577402234077454, "learning_rate": 1.7492918810870582e-05, "loss": 0.0187, "step": 58950 }, { "epoch": 1.254625101007953, "grad_norm": 1.045675277709961, "learning_rate": 1.7490792327648536e-05, "loss": 0.0215, "step": 59000 }, { "epoch": 1.254625101007953, "eval_f1": 0.6573114562843732, "eval_loss": 0.02082480862736702, "eval_precision": 0.8697226011822765, "eval_recall": 0.5282883360679446, "eval_runtime": 233.8318, "eval_samples_per_second": 402.212, "eval_steps_per_second": 12.573, "step": 59000 }, { "epoch": 1.2556883426189767, "grad_norm": 1.163398265838623, "learning_rate": 1.748866584442649e-05, "loss": 0.0223, "step": 59050 }, { "epoch": 1.2567515842300003, "grad_norm": 0.44529324769973755, "learning_rate": 1.7486539361204443e-05, "loss": 0.0191, "step": 59100 }, { "epoch": 1.2578148258410242, "grad_norm": 1.7444082498550415, "learning_rate": 1.7484412877982394e-05, "loss": 0.0207, "step": 59150 }, { "epoch": 1.2588780674520477, "grad_norm": 0.154902845621109, "learning_rate": 1.7482286394760347e-05, "loss": 0.0231, "step": 59200 }, { "epoch": 1.2588780674520477, "eval_f1": 0.6638119902336514, "eval_loss": 0.021000294014811516, "eval_precision": 0.8475449091516958, "eval_recall": 0.5455469557098438, "eval_runtime": 232.2814, "eval_samples_per_second": 404.897, "eval_steps_per_second": 12.657, "step": 59200 }, { "epoch": 1.2599413090630716, "grad_norm": 0.2750571668148041, "learning_rate": 1.7480159911538298e-05, "loss": 0.0163, "step": 59250 }, { "epoch": 1.2610045506740952, "grad_norm": 0.38755232095718384, "learning_rate": 1.747803342831625e-05, "loss": 0.0182, "step": 59300 }, { "epoch": 1.262067792285119, "grad_norm": 0.7863503694534302, "learning_rate": 1.7475906945094205e-05, "loss": 0.021, "step": 59350 }, { "epoch": 1.2631310338961426, "grad_norm": 0.5975663065910339, "learning_rate": 1.7473780461872156e-05, "loss": 0.021, "step": 59400 }, { "epoch": 1.2631310338961426, "eval_f1": 0.6664404682246796, "eval_loss": 0.021018147468566895, "eval_precision": 0.8502087101114392, "eval_recall": 0.5479943207871042, "eval_runtime": 230.9451, "eval_samples_per_second": 407.24, "eval_steps_per_second": 12.73, "step": 59400 }, { "epoch": 1.2641942755071662, "grad_norm": 0.29623734951019287, "learning_rate": 1.747165397865011e-05, "loss": 0.0223, "step": 59450 }, { "epoch": 1.26525751711819, "grad_norm": 0.7600260376930237, "learning_rate": 1.7469527495428063e-05, "loss": 0.0216, "step": 59500 }, { "epoch": 1.2663207587292136, "grad_norm": 0.5567991137504578, "learning_rate": 1.7467401012206017e-05, "loss": 0.0181, "step": 59550 }, { "epoch": 1.2673840003402372, "grad_norm": 0.25399553775787354, "learning_rate": 1.7465274528983967e-05, "loss": 0.0204, "step": 59600 }, { "epoch": 1.2673840003402372, "eval_f1": 0.6635881420073368, "eval_loss": 0.020989634096622467, "eval_precision": 0.8234109605860251, "eval_recall": 0.5557231328482493, "eval_runtime": 231.1026, "eval_samples_per_second": 406.962, "eval_steps_per_second": 12.722, "step": 59600 }, { "epoch": 1.268447241951261, "grad_norm": 0.3186938464641571, "learning_rate": 1.746314804576192e-05, "loss": 0.0231, "step": 59650 }, { "epoch": 1.2695104835622848, "grad_norm": 1.126382827758789, "learning_rate": 1.746102156253987e-05, "loss": 0.0181, "step": 59700 }, { "epoch": 1.2705737251733085, "grad_norm": 0.28928157687187195, "learning_rate": 1.7458895079317825e-05, "loss": 0.0186, "step": 59750 }, { "epoch": 1.2716369667843321, "grad_norm": 0.6107980608940125, "learning_rate": 1.745676859609578e-05, "loss": 0.0196, "step": 59800 }, { "epoch": 1.2716369667843321, "eval_f1": 0.6663020044625086, "eval_loss": 0.021735653281211853, "eval_precision": 0.8330100628304543, "eval_recall": 0.5551927783394073, "eval_runtime": 231.0959, "eval_samples_per_second": 406.974, "eval_steps_per_second": 12.722, "step": 59800 }, { "epoch": 1.2727002083953558, "grad_norm": 0.9550905227661133, "learning_rate": 1.745464211287373e-05, "loss": 0.0203, "step": 59850 }, { "epoch": 1.2737634500063795, "grad_norm": 0.59261554479599, "learning_rate": 1.7452515629651683e-05, "loss": 0.0166, "step": 59900 }, { "epoch": 1.2748266916174031, "grad_norm": 0.3394817113876343, "learning_rate": 1.7450389146429636e-05, "loss": 0.023, "step": 59950 }, { "epoch": 1.2758899332284268, "grad_norm": 1.3589529991149902, "learning_rate": 1.744826266320759e-05, "loss": 0.0199, "step": 60000 }, { "epoch": 1.2758899332284268, "eval_f1": 0.6619710711025019, "eval_loss": 0.021295299753546715, "eval_precision": 0.8412007664465965, "eval_recall": 0.5457016424415894, "eval_runtime": 230.8904, "eval_samples_per_second": 407.336, "eval_steps_per_second": 12.733, "step": 60000 }, { "epoch": 1.2769531748394505, "grad_norm": 0.4758854806423187, "learning_rate": 1.7446136179985544e-05, "loss": 0.0167, "step": 60050 }, { "epoch": 1.2780164164504741, "grad_norm": 0.6086984276771545, "learning_rate": 1.7444009696763494e-05, "loss": 0.0216, "step": 60100 }, { "epoch": 1.2790796580614978, "grad_norm": 0.14305813610553741, "learning_rate": 1.7441883213541448e-05, "loss": 0.0216, "step": 60150 }, { "epoch": 1.2801428996725215, "grad_norm": 0.19541509449481964, "learning_rate": 1.7439756730319398e-05, "loss": 0.018, "step": 60200 }, { "epoch": 1.2801428996725215, "eval_f1": 0.6679060933922908, "eval_loss": 0.021336298435926437, "eval_precision": 0.8301865108031105, "eval_recall": 0.5586953279082177, "eval_runtime": 231.1567, "eval_samples_per_second": 406.867, "eval_steps_per_second": 12.719, "step": 60200 }, { "epoch": 1.2812061412835454, "grad_norm": 0.7711753249168396, "learning_rate": 1.7437630247097352e-05, "loss": 0.0173, "step": 60250 }, { "epoch": 1.282269382894569, "grad_norm": 1.9953726530075073, "learning_rate": 1.7435503763875306e-05, "loss": 0.0222, "step": 60300 }, { "epoch": 1.2833326245055927, "grad_norm": 0.20585110783576965, "learning_rate": 1.7433377280653256e-05, "loss": 0.019, "step": 60350 }, { "epoch": 1.2843958661166164, "grad_norm": 0.6262738108634949, "learning_rate": 1.743125079743121e-05, "loss": 0.0198, "step": 60400 }, { "epoch": 1.2843958661166164, "eval_f1": 0.6621932546037779, "eval_loss": 0.02090124599635601, "eval_precision": 0.8585799910225527, "eval_recall": 0.538923048875453, "eval_runtime": 231.0765, "eval_samples_per_second": 407.008, "eval_steps_per_second": 12.723, "step": 60400 }, { "epoch": 1.28545910772764, "grad_norm": 0.39449721574783325, "learning_rate": 1.7429124314209163e-05, "loss": 0.0141, "step": 60450 }, { "epoch": 1.2865223493386637, "grad_norm": 0.2634122669696808, "learning_rate": 1.7426997830987117e-05, "loss": 0.0176, "step": 60500 }, { "epoch": 1.2875855909496874, "grad_norm": 0.5418798923492432, "learning_rate": 1.7424871347765067e-05, "loss": 0.0258, "step": 60550 }, { "epoch": 1.288648832560711, "grad_norm": 0.37381020188331604, "learning_rate": 1.742274486454302e-05, "loss": 0.0164, "step": 60600 }, { "epoch": 1.288648832560711, "eval_f1": 0.6638315181414081, "eval_loss": 0.0217717494815588, "eval_precision": 0.8406750668969244, "eval_recall": 0.5484583809823409, "eval_runtime": 231.1378, "eval_samples_per_second": 406.9, "eval_steps_per_second": 12.72, "step": 60600 }, { "epoch": 1.2897120741717347, "grad_norm": 0.5200266242027283, "learning_rate": 1.742061838132097e-05, "loss": 0.0248, "step": 60650 }, { "epoch": 1.2907753157827584, "grad_norm": 0.5575728416442871, "learning_rate": 1.7418491898098925e-05, "loss": 0.0211, "step": 60700 }, { "epoch": 1.291838557393782, "grad_norm": 0.4011172354221344, "learning_rate": 1.741636541487688e-05, "loss": 0.0212, "step": 60750 }, { "epoch": 1.292901799004806, "grad_norm": 0.34959661960601807, "learning_rate": 1.741423893165483e-05, "loss": 0.0209, "step": 60800 }, { "epoch": 1.292901799004806, "eval_f1": 0.6646807565083701, "eval_loss": 0.021642576903104782, "eval_precision": 0.8235764855749321, "eval_recall": 0.5571816077475646, "eval_runtime": 230.8913, "eval_samples_per_second": 407.335, "eval_steps_per_second": 12.733, "step": 60800 }, { "epoch": 1.2939650406158296, "grad_norm": 0.4573071300983429, "learning_rate": 1.7412112448432783e-05, "loss": 0.0209, "step": 60850 }, { "epoch": 1.2950282822268533, "grad_norm": 1.239029049873352, "learning_rate": 1.7409985965210737e-05, "loss": 0.0238, "step": 60900 }, { "epoch": 1.296091523837877, "grad_norm": 0.2835051119327545, "learning_rate": 1.740785948198869e-05, "loss": 0.0176, "step": 60950 }, { "epoch": 1.2971547654489006, "grad_norm": 0.35293272137641907, "learning_rate": 1.740573299876664e-05, "loss": 0.0193, "step": 61000 }, { "epoch": 1.2971547654489006, "eval_f1": 0.6646905630191198, "eval_loss": 0.02121865563094616, "eval_precision": 0.8176208184435522, "eval_recall": 0.5599549198667174, "eval_runtime": 230.9637, "eval_samples_per_second": 407.207, "eval_steps_per_second": 12.729, "step": 61000 }, { "epoch": 1.2982180070599243, "grad_norm": 0.42994552850723267, "learning_rate": 1.7403606515544594e-05, "loss": 0.024, "step": 61050 }, { "epoch": 1.299281248670948, "grad_norm": 0.41127943992614746, "learning_rate": 1.7401480032322545e-05, "loss": 0.0164, "step": 61100 }, { "epoch": 1.3003444902819716, "grad_norm": 0.7554137110710144, "learning_rate": 1.73993535491005e-05, "loss": 0.0209, "step": 61150 }, { "epoch": 1.3014077318929953, "grad_norm": 0.6362655758857727, "learning_rate": 1.7397227065878452e-05, "loss": 0.0198, "step": 61200 }, { "epoch": 1.3014077318929953, "eval_f1": 0.6651102225672628, "eval_loss": 0.022199010476469994, "eval_precision": 0.8428561740220712, "eval_recall": 0.549276010850139, "eval_runtime": 231.1058, "eval_samples_per_second": 406.956, "eval_steps_per_second": 12.721, "step": 61200 }, { "epoch": 1.3024709735040192, "grad_norm": 0.4933357536792755, "learning_rate": 1.7395100582656403e-05, "loss": 0.0171, "step": 61250 }, { "epoch": 1.3035342151150426, "grad_norm": 0.30413687229156494, "learning_rate": 1.7392974099434356e-05, "loss": 0.022, "step": 61300 }, { "epoch": 1.3045974567260665, "grad_norm": 0.5841660499572754, "learning_rate": 1.739084761621231e-05, "loss": 0.021, "step": 61350 }, { "epoch": 1.3056606983370902, "grad_norm": 0.22698457539081573, "learning_rate": 1.7388721132990264e-05, "loss": 0.0208, "step": 61400 }, { "epoch": 1.3056606983370902, "eval_f1": 0.6618279015279027, "eval_loss": 0.020812716335058212, "eval_precision": 0.8477441176216082, "eval_recall": 0.5427902171690923, "eval_runtime": 231.0647, "eval_samples_per_second": 407.029, "eval_steps_per_second": 12.724, "step": 61400 }, { "epoch": 1.3067239399481139, "grad_norm": 0.7591254115104675, "learning_rate": 1.7386594649768214e-05, "loss": 0.0208, "step": 61450 }, { "epoch": 1.3077871815591375, "grad_norm": 0.5949316024780273, "learning_rate": 1.7384468166546168e-05, "loss": 0.0192, "step": 61500 }, { "epoch": 1.3088504231701612, "grad_norm": 0.2152254730463028, "learning_rate": 1.738234168332412e-05, "loss": 0.019, "step": 61550 }, { "epoch": 1.3099136647811849, "grad_norm": 0.18432724475860596, "learning_rate": 1.7380215200102072e-05, "loss": 0.0174, "step": 61600 }, { "epoch": 1.3099136647811849, "eval_f1": 0.6573181552138689, "eval_loss": 0.021131472662091255, "eval_precision": 0.8604451595601269, "eval_recall": 0.5317798365844876, "eval_runtime": 231.3828, "eval_samples_per_second": 406.469, "eval_steps_per_second": 12.706, "step": 61600 } ], "logging_steps": 50, "max_steps": 470260, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.442078540161113e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }