Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity,
"... is not valid JSON
| { | |
| "best_global_step": 10000, | |
| "best_metric": 0.8816914583342934, | |
| "best_model_checkpoint": "./checkpoint-10000", | |
| "epoch": 2.844201095072175, | |
| "eval_steps": 1000, | |
| "global_step": 10000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00028443433122377873, | |
| "grad_norm": Infinity, | |
| "learning_rate": 0.0, | |
| "loss": 6.8942, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.028443433122377872, | |
| "grad_norm": 10.016256332397461, | |
| "learning_rate": 1.8767772511848342e-06, | |
| "loss": 5.0217, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.056886866244755745, | |
| "grad_norm": 29.21646499633789, | |
| "learning_rate": 3.772511848341233e-06, | |
| "loss": 3.3137, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.08533029936713361, | |
| "grad_norm": 33.183448791503906, | |
| "learning_rate": 5.66824644549763e-06, | |
| "loss": 2.5012, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.11377373248951149, | |
| "grad_norm": 35.1544303894043, | |
| "learning_rate": 7.563981042654029e-06, | |
| "loss": 2.0729, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.14221716561188935, | |
| "grad_norm": 40.57954406738281, | |
| "learning_rate": 9.459715639810427e-06, | |
| "loss": 1.8169, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.17066059873426723, | |
| "grad_norm": 28.46094512939453, | |
| "learning_rate": 1.1355450236966825e-05, | |
| "loss": 1.7012, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.1991040318566451, | |
| "grad_norm": 18.60147476196289, | |
| "learning_rate": 1.3251184834123222e-05, | |
| "loss": 1.5425, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.22754746497902298, | |
| "grad_norm": 32.00541687011719, | |
| "learning_rate": 1.5146919431279623e-05, | |
| "loss": 1.4402, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.25599089810140085, | |
| "grad_norm": 18.643096923828125, | |
| "learning_rate": 1.704265402843602e-05, | |
| "loss": 1.38, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.2844343312237787, | |
| "grad_norm": 19.019792556762695, | |
| "learning_rate": 1.8938388625592418e-05, | |
| "loss": 1.3005, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2844343312237787, | |
| "eval_f1_macro": 0.7974631141388782, | |
| "eval_loss": 0.3369969427585602, | |
| "eval_runtime": 407.0416, | |
| "eval_samples_per_second": 491.35, | |
| "eval_steps_per_second": 3.84, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3128777643461566, | |
| "grad_norm": 15.35057544708252, | |
| "learning_rate": 1.9907300115874858e-05, | |
| "loss": 1.3325, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.34132119746853445, | |
| "grad_norm": 7.621375560760498, | |
| "learning_rate": 1.969661856104498e-05, | |
| "loss": 1.2425, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.3697646305909123, | |
| "grad_norm": 28.559072494506836, | |
| "learning_rate": 1.9485937006215108e-05, | |
| "loss": 1.2189, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.3982080637132902, | |
| "grad_norm": 11.210355758666992, | |
| "learning_rate": 1.927525545138523e-05, | |
| "loss": 1.166, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.42665149683566805, | |
| "grad_norm": 16.360652923583984, | |
| "learning_rate": 1.906457389655536e-05, | |
| "loss": 1.1281, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.45509492995804596, | |
| "grad_norm": 17.10007095336914, | |
| "learning_rate": 1.8853892341725482e-05, | |
| "loss": 1.1306, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.4835383630804238, | |
| "grad_norm": 11.677529335021973, | |
| "learning_rate": 1.864321078689561e-05, | |
| "loss": 1.1031, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.5119817962028017, | |
| "grad_norm": 9.072050094604492, | |
| "learning_rate": 1.8432529232065736e-05, | |
| "loss": 1.0715, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.5404252293251796, | |
| "grad_norm": 11.437026023864746, | |
| "learning_rate": 1.822184767723586e-05, | |
| "loss": 1.0483, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.5688686624475574, | |
| "grad_norm": 7.232166290283203, | |
| "learning_rate": 1.8011166122405986e-05, | |
| "loss": 1.0246, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5688686624475574, | |
| "eval_f1_macro": 0.8172822685871202, | |
| "eval_loss": 0.26557663083076477, | |
| "eval_runtime": 404.6294, | |
| "eval_samples_per_second": 494.279, | |
| "eval_steps_per_second": 3.863, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5973120955699353, | |
| "grad_norm": 11.33175277709961, | |
| "learning_rate": 1.780048456757611e-05, | |
| "loss": 1.0286, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.6257555286923132, | |
| "grad_norm": 10.749075889587402, | |
| "learning_rate": 1.7589803012746237e-05, | |
| "loss": 1.0027, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.6541989618146911, | |
| "grad_norm": 17.69948387145996, | |
| "learning_rate": 1.737912145791636e-05, | |
| "loss": 0.9903, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.6826423949370689, | |
| "grad_norm": 9.61713695526123, | |
| "learning_rate": 1.7168439903086487e-05, | |
| "loss": 0.9816, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.7110858280594468, | |
| "grad_norm": 10.537064552307129, | |
| "learning_rate": 1.695775834825661e-05, | |
| "loss": 0.9797, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.7395292611818246, | |
| "grad_norm": 10.092955589294434, | |
| "learning_rate": 1.6747076793426738e-05, | |
| "loss": 0.9582, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.7679726943042026, | |
| "grad_norm": 9.091156959533691, | |
| "learning_rate": 1.653639523859686e-05, | |
| "loss": 0.9423, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.7964161274265804, | |
| "grad_norm": 7.700159549713135, | |
| "learning_rate": 1.632571368376699e-05, | |
| "loss": 0.9291, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.8248595605489583, | |
| "grad_norm": 8.272307395935059, | |
| "learning_rate": 1.6115032128937115e-05, | |
| "loss": 0.9524, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.8533029936713361, | |
| "grad_norm": 11.422605514526367, | |
| "learning_rate": 1.590435057410724e-05, | |
| "loss": 0.9236, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.8533029936713361, | |
| "eval_f1_macro": 0.85234873705105, | |
| "eval_loss": 0.23829074203968048, | |
| "eval_runtime": 404.6948, | |
| "eval_samples_per_second": 494.2, | |
| "eval_steps_per_second": 3.862, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.881746426793714, | |
| "grad_norm": 8.847649574279785, | |
| "learning_rate": 1.5693669019277366e-05, | |
| "loss": 0.9048, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.9101898599160919, | |
| "grad_norm": 10.125356674194336, | |
| "learning_rate": 1.548298746444749e-05, | |
| "loss": 0.9432, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.9386332930384698, | |
| "grad_norm": 10.186013221740723, | |
| "learning_rate": 1.5272305909617616e-05, | |
| "loss": 0.903, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.9670767261608476, | |
| "grad_norm": 8.035357475280762, | |
| "learning_rate": 1.506162435478774e-05, | |
| "loss": 0.9075, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.9955201592832255, | |
| "grad_norm": 8.40256404876709, | |
| "learning_rate": 1.4850942799957867e-05, | |
| "loss": 0.9129, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.0238924838227974, | |
| "grad_norm": 8.256072998046875, | |
| "learning_rate": 1.4640261245127992e-05, | |
| "loss": 0.8138, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.0523359169451754, | |
| "grad_norm": 8.351125717163086, | |
| "learning_rate": 1.4429579690298117e-05, | |
| "loss": 0.8174, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.080779350067553, | |
| "grad_norm": 16.844223022460938, | |
| "learning_rate": 1.4218898135468239e-05, | |
| "loss": 0.7941, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.109222783189931, | |
| "grad_norm": 7.787734031677246, | |
| "learning_rate": 1.4008216580638366e-05, | |
| "loss": 0.7983, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.1376662163123088, | |
| "grad_norm": 10.029424667358398, | |
| "learning_rate": 1.3797535025808491e-05, | |
| "loss": 0.8016, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.1376662163123088, | |
| "eval_f1_macro": 0.8639591530686195, | |
| "eval_loss": 0.21851009130477905, | |
| "eval_runtime": 404.5114, | |
| "eval_samples_per_second": 494.424, | |
| "eval_steps_per_second": 3.864, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.1661096494346868, | |
| "grad_norm": 10.865072250366211, | |
| "learning_rate": 1.3586853470978616e-05, | |
| "loss": 0.8246, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.1945530825570647, | |
| "grad_norm": 6.806798934936523, | |
| "learning_rate": 1.3376171916148742e-05, | |
| "loss": 0.8112, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.2229965156794425, | |
| "grad_norm": 5.789618492126465, | |
| "learning_rate": 1.3165490361318867e-05, | |
| "loss": 0.8061, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.2514399488018204, | |
| "grad_norm": 12.199695587158203, | |
| "learning_rate": 1.2954808806488992e-05, | |
| "loss": 0.8079, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.2798833819241984, | |
| "grad_norm": 10.48000431060791, | |
| "learning_rate": 1.2744127251659117e-05, | |
| "loss": 0.8118, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.308326815046576, | |
| "grad_norm": 11.554473876953125, | |
| "learning_rate": 1.2533445696829243e-05, | |
| "loss": 0.7698, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.336770248168954, | |
| "grad_norm": 7.0890374183654785, | |
| "learning_rate": 1.2322764141999368e-05, | |
| "loss": 0.7983, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.3652136812913318, | |
| "grad_norm": 7.440741539001465, | |
| "learning_rate": 1.2112082587169493e-05, | |
| "loss": 0.7862, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.3936571144137098, | |
| "grad_norm": 12.46674919128418, | |
| "learning_rate": 1.1901401032339618e-05, | |
| "loss": 0.766, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.4221005475360875, | |
| "grad_norm": 11.290918350219727, | |
| "learning_rate": 1.1690719477509744e-05, | |
| "loss": 0.7744, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.4221005475360875, | |
| "eval_f1_macro": 0.8615043778768627, | |
| "eval_loss": 0.22264312207698822, | |
| "eval_runtime": 404.4096, | |
| "eval_samples_per_second": 494.548, | |
| "eval_steps_per_second": 3.865, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.4505439806584655, | |
| "grad_norm": 7.385248184204102, | |
| "learning_rate": 1.148003792267987e-05, | |
| "loss": 0.7632, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.4789874137808434, | |
| "grad_norm": 10.126408576965332, | |
| "learning_rate": 1.1269356367849996e-05, | |
| "loss": 0.772, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.5074308469032212, | |
| "grad_norm": 8.623499870300293, | |
| "learning_rate": 1.1058674813020121e-05, | |
| "loss": 0.7809, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.5358742800255991, | |
| "grad_norm": 10.418437004089355, | |
| "learning_rate": 1.0847993258190246e-05, | |
| "loss": 0.7777, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.564317713147977, | |
| "grad_norm": 11.511435508728027, | |
| "learning_rate": 1.0637311703360371e-05, | |
| "loss": 0.7481, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.5927611462703548, | |
| "grad_norm": 7.2818121910095215, | |
| "learning_rate": 1.0426630148530497e-05, | |
| "loss": 0.7572, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.6212045793927325, | |
| "grad_norm": 10.107643127441406, | |
| "learning_rate": 1.0215948593700622e-05, | |
| "loss": 0.7687, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.6496480125151107, | |
| "grad_norm": 11.408272743225098, | |
| "learning_rate": 1.0005267038870747e-05, | |
| "loss": 0.7693, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.6780914456374885, | |
| "grad_norm": 12.303543090820312, | |
| "learning_rate": 9.794585484040872e-06, | |
| "loss": 0.7338, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.7065348787598662, | |
| "grad_norm": 10.1310396194458, | |
| "learning_rate": 9.583903929210998e-06, | |
| "loss": 0.7476, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.7065348787598662, | |
| "eval_f1_macro": 0.8754489727729019, | |
| "eval_loss": 0.20082785189151764, | |
| "eval_runtime": 404.7772, | |
| "eval_samples_per_second": 494.099, | |
| "eval_steps_per_second": 3.861, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.7349783118822442, | |
| "grad_norm": 8.645033836364746, | |
| "learning_rate": 9.373222374381123e-06, | |
| "loss": 0.7637, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.7634217450046221, | |
| "grad_norm": 9.703550338745117, | |
| "learning_rate": 9.162540819551248e-06, | |
| "loss": 0.7443, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.7918651781269999, | |
| "grad_norm": 11.156991958618164, | |
| "learning_rate": 8.951859264721375e-06, | |
| "loss": 0.7523, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.8203086112493778, | |
| "grad_norm": 9.254724502563477, | |
| "learning_rate": 8.7411777098915e-06, | |
| "loss": 0.7513, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.8487520443717558, | |
| "grad_norm": 8.819207191467285, | |
| "learning_rate": 8.530496155061625e-06, | |
| "loss": 0.7536, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.8771954774941335, | |
| "grad_norm": 10.501325607299805, | |
| "learning_rate": 8.31981460023175e-06, | |
| "loss": 0.7411, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.9056389106165113, | |
| "grad_norm": 20.624738693237305, | |
| "learning_rate": 8.109133045401876e-06, | |
| "loss": 0.7514, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.9340823437388894, | |
| "grad_norm": 8.438063621520996, | |
| "learning_rate": 7.898451490572001e-06, | |
| "loss": 0.737, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.9625257768612672, | |
| "grad_norm": 7.91304874420166, | |
| "learning_rate": 7.687769935742126e-06, | |
| "loss": 0.7252, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.990969209983645, | |
| "grad_norm": 12.667023658752441, | |
| "learning_rate": 7.477088380912252e-06, | |
| "loss": 0.7187, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.990969209983645, | |
| "eval_f1_macro": 0.8785000192236985, | |
| "eval_loss": 0.1944791078567505, | |
| "eval_runtime": 404.7549, | |
| "eval_samples_per_second": 494.126, | |
| "eval_steps_per_second": 3.862, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.019341534523217, | |
| "grad_norm": 7.297705173492432, | |
| "learning_rate": 7.266406826082377e-06, | |
| "loss": 0.6465, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 2.047784967645595, | |
| "grad_norm": 10.342308044433594, | |
| "learning_rate": 7.055725271252503e-06, | |
| "loss": 0.6034, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 2.0762284007679725, | |
| "grad_norm": 16.366321563720703, | |
| "learning_rate": 6.845043716422628e-06, | |
| "loss": 0.6199, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 2.1046718338903507, | |
| "grad_norm": 9.647150993347168, | |
| "learning_rate": 6.6343621615927535e-06, | |
| "loss": 0.5877, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 2.1331152670127285, | |
| "grad_norm": 10.496694564819336, | |
| "learning_rate": 6.423680606762879e-06, | |
| "loss": 0.6048, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.161558700135106, | |
| "grad_norm": 8.25317668914795, | |
| "learning_rate": 6.212999051933004e-06, | |
| "loss": 0.6036, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 2.1900021332574844, | |
| "grad_norm": 7.672098159790039, | |
| "learning_rate": 6.002317497103129e-06, | |
| "loss": 0.5875, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 2.218445566379862, | |
| "grad_norm": 11.453436851501465, | |
| "learning_rate": 5.791635942273255e-06, | |
| "loss": 0.5878, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 2.24688899950224, | |
| "grad_norm": 8.48521900177002, | |
| "learning_rate": 5.5809543874433805e-06, | |
| "loss": 0.5879, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 2.2753324326246176, | |
| "grad_norm": 10.552648544311523, | |
| "learning_rate": 5.370272832613506e-06, | |
| "loss": 0.6051, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.2753324326246176, | |
| "eval_f1_macro": 0.8787007793384074, | |
| "eval_loss": 0.20028316974639893, | |
| "eval_runtime": 404.6697, | |
| "eval_samples_per_second": 494.23, | |
| "eval_steps_per_second": 3.862, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.3037758657469958, | |
| "grad_norm": 7.606287479400635, | |
| "learning_rate": 5.159591277783631e-06, | |
| "loss": 0.591, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 2.3322192988693735, | |
| "grad_norm": 10.84433364868164, | |
| "learning_rate": 4.948909722953756e-06, | |
| "loss": 0.5676, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 2.3606627319917513, | |
| "grad_norm": 7.784737586975098, | |
| "learning_rate": 4.7382281681238814e-06, | |
| "loss": 0.5582, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 2.3891061651141294, | |
| "grad_norm": 10.117920875549316, | |
| "learning_rate": 4.527546613294007e-06, | |
| "loss": 0.5748, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 2.417549598236507, | |
| "grad_norm": 10.901618003845215, | |
| "learning_rate": 4.316865058464132e-06, | |
| "loss": 0.5832, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.445993031358885, | |
| "grad_norm": 8.637004852294922, | |
| "learning_rate": 4.106183503634257e-06, | |
| "loss": 0.5916, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 2.474436464481263, | |
| "grad_norm": 13.190328598022461, | |
| "learning_rate": 3.895501948804382e-06, | |
| "loss": 0.5849, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 2.502879897603641, | |
| "grad_norm": 10.942352294921875, | |
| "learning_rate": 3.684820393974508e-06, | |
| "loss": 0.5709, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 2.5313233307260186, | |
| "grad_norm": 10.253789901733398, | |
| "learning_rate": 3.474138839144633e-06, | |
| "loss": 0.5772, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 2.5597667638483967, | |
| "grad_norm": 6.905132293701172, | |
| "learning_rate": 3.263457284314758e-06, | |
| "loss": 0.579, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.5597667638483967, | |
| "eval_f1_macro": 0.8794205074585332, | |
| "eval_loss": 0.19818730652332306, | |
| "eval_runtime": 404.4575, | |
| "eval_samples_per_second": 494.49, | |
| "eval_steps_per_second": 3.864, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.5882101969707745, | |
| "grad_norm": 8.60560131072998, | |
| "learning_rate": 3.0527757294848838e-06, | |
| "loss": 0.5877, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 2.616653630093152, | |
| "grad_norm": 9.271297454833984, | |
| "learning_rate": 2.842094174655009e-06, | |
| "loss": 0.6023, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 2.64509706321553, | |
| "grad_norm": 9.618608474731445, | |
| "learning_rate": 2.6314126198251342e-06, | |
| "loss": 0.5977, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 2.673540496337908, | |
| "grad_norm": 7.832092761993408, | |
| "learning_rate": 2.42073106499526e-06, | |
| "loss": 0.5764, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 2.701983929460286, | |
| "grad_norm": 9.95085620880127, | |
| "learning_rate": 2.210049510165385e-06, | |
| "loss": 0.5693, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.7304273625826636, | |
| "grad_norm": 9.252172470092773, | |
| "learning_rate": 1.9993679553355104e-06, | |
| "loss": 0.5547, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 2.7588707957050413, | |
| "grad_norm": 9.34287166595459, | |
| "learning_rate": 1.7886864005056358e-06, | |
| "loss": 0.5801, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 2.7873142288274195, | |
| "grad_norm": 8.971467018127441, | |
| "learning_rate": 1.5780048456757613e-06, | |
| "loss": 0.5675, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 2.8157576619497973, | |
| "grad_norm": 9.46047306060791, | |
| "learning_rate": 1.3673232908458867e-06, | |
| "loss": 0.5669, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 2.844201095072175, | |
| "grad_norm": 8.73759651184082, | |
| "learning_rate": 1.156641736016012e-06, | |
| "loss": 0.5556, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.844201095072175, | |
| "eval_f1_macro": 0.8816914583342934, | |
| "eval_loss": 0.19616882503032684, | |
| "eval_runtime": 404.736, | |
| "eval_samples_per_second": 494.149, | |
| "eval_steps_per_second": 3.862, | |
| "step": 10000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 10548, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.7445735094230712e+18, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |