| { |
| "best_global_step": 7604, |
| "best_metric": 0.9633717243752477, |
| "best_model_checkpoint": "camelbert_madar_task5/checkpoint-7604", |
| "epoch": 4.0, |
| "eval_steps": 500, |
| "global_step": 7604, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.026301946344029457, |
| "grad_norm": 21.061479568481445, |
| "learning_rate": 1.9896896370331405e-05, |
| "loss": 0.9381, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.052603892688058915, |
| "grad_norm": 4.199251651763916, |
| "learning_rate": 1.9791688584955288e-05, |
| "loss": 0.5182, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.07890583903208838, |
| "grad_norm": 22.227828979492188, |
| "learning_rate": 1.968648079957917e-05, |
| "loss": 0.4486, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.10520778537611783, |
| "grad_norm": 7.481734275817871, |
| "learning_rate": 1.9581273014203053e-05, |
| "loss": 0.4422, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.1315097317201473, |
| "grad_norm": 9.7647705078125, |
| "learning_rate": 1.9476065228826936e-05, |
| "loss": 0.4304, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.15781167806417676, |
| "grad_norm": 12.080931663513184, |
| "learning_rate": 1.9370857443450818e-05, |
| "loss": 0.3672, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1841136244082062, |
| "grad_norm": 11.353347778320312, |
| "learning_rate": 1.9265649658074697e-05, |
| "loss": 0.3771, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.21041557075223566, |
| "grad_norm": 3.3302719593048096, |
| "learning_rate": 1.916044187269858e-05, |
| "loss": 0.4053, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.23671751709626512, |
| "grad_norm": 11.869136810302734, |
| "learning_rate": 1.9055234087322463e-05, |
| "loss": 0.3754, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2630194634402946, |
| "grad_norm": 19.71166229248047, |
| "learning_rate": 1.8950026301946345e-05, |
| "loss": 0.3909, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.289321409784324, |
| "grad_norm": 82.08606719970703, |
| "learning_rate": 1.8844818516570228e-05, |
| "loss": 0.3945, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.3156233561283535, |
| "grad_norm": 5.5329389572143555, |
| "learning_rate": 1.873961073119411e-05, |
| "loss": 0.3182, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.34192530247238295, |
| "grad_norm": 10.177448272705078, |
| "learning_rate": 1.8634402945817993e-05, |
| "loss": 0.3294, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.3682272488164124, |
| "grad_norm": 11.596871376037598, |
| "learning_rate": 1.8529195160441876e-05, |
| "loss": 0.3445, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.3945291951604419, |
| "grad_norm": 5.0095319747924805, |
| "learning_rate": 1.8423987375065758e-05, |
| "loss": 0.3403, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.4208311415044713, |
| "grad_norm": 6.569547653198242, |
| "learning_rate": 1.8318779589689638e-05, |
| "loss": 0.2767, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.4471330878485008, |
| "grad_norm": 7.269279956817627, |
| "learning_rate": 1.821357180431352e-05, |
| "loss": 0.3391, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.47343503419253025, |
| "grad_norm": 6.403675079345703, |
| "learning_rate": 1.8108364018937403e-05, |
| "loss": 0.2729, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4997369805365597, |
| "grad_norm": 18.03633689880371, |
| "learning_rate": 1.8003156233561285e-05, |
| "loss": 0.2912, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.5260389268805892, |
| "grad_norm": 11.609797477722168, |
| "learning_rate": 1.7897948448185168e-05, |
| "loss": 0.3678, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.5523408732246187, |
| "grad_norm": 8.587767601013184, |
| "learning_rate": 1.779274066280905e-05, |
| "loss": 0.2789, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.578642819568648, |
| "grad_norm": 2.894766092300415, |
| "learning_rate": 1.768753287743293e-05, |
| "loss": 0.2515, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.6049447659126775, |
| "grad_norm": 14.84619140625, |
| "learning_rate": 1.7582325092056812e-05, |
| "loss": 0.2817, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.631246712256707, |
| "grad_norm": 14.3659029006958, |
| "learning_rate": 1.7477117306680695e-05, |
| "loss": 0.2819, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.6575486586007364, |
| "grad_norm": 24.962841033935547, |
| "learning_rate": 1.7371909521304578e-05, |
| "loss": 0.275, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.6838506049447659, |
| "grad_norm": 2.1663622856140137, |
| "learning_rate": 1.726670173592846e-05, |
| "loss": 0.2513, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.7101525512887954, |
| "grad_norm": 20.324939727783203, |
| "learning_rate": 1.7161493950552343e-05, |
| "loss": 0.2862, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.7364544976328248, |
| "grad_norm": 12.115033149719238, |
| "learning_rate": 1.7056286165176222e-05, |
| "loss": 0.2489, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.7627564439768543, |
| "grad_norm": 9.39247989654541, |
| "learning_rate": 1.6951078379800105e-05, |
| "loss": 0.2199, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.7890583903208838, |
| "grad_norm": 11.820609092712402, |
| "learning_rate": 1.684587059442399e-05, |
| "loss": 0.2334, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.8153603366649133, |
| "grad_norm": 5.685638427734375, |
| "learning_rate": 1.6740662809047873e-05, |
| "loss": 0.2859, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.8416622830089426, |
| "grad_norm": 1.4263566732406616, |
| "learning_rate": 1.6635455023671752e-05, |
| "loss": 0.2712, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.8679642293529721, |
| "grad_norm": 43.12693786621094, |
| "learning_rate": 1.6530247238295635e-05, |
| "loss": 0.2236, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.8942661756970016, |
| "grad_norm": 18.322067260742188, |
| "learning_rate": 1.6425039452919518e-05, |
| "loss": 0.2176, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.920568122041031, |
| "grad_norm": 8.125885009765625, |
| "learning_rate": 1.63198316675434e-05, |
| "loss": 0.2344, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.9468700683850605, |
| "grad_norm": 4.2774457931518555, |
| "learning_rate": 1.6214623882167283e-05, |
| "loss": 0.2173, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.97317201472909, |
| "grad_norm": 8.311309814453125, |
| "learning_rate": 1.6109416096791165e-05, |
| "loss": 0.207, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.9994739610731194, |
| "grad_norm": 18.770065307617188, |
| "learning_rate": 1.6004208311415045e-05, |
| "loss": 0.2261, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_FPR_aeb_Arab": 0.007114016533913859, |
| "eval_FPR_arb_Arab": 0.007306287251046666, |
| "eval_FPR_ars_Arab": 0.0274961810821419, |
| "eval_FPR_arz_Arab": 0.03769230768505917, |
| "eval_accuracy": 0.9382975924220497, |
| "eval_loss": 0.2227914035320282, |
| "eval_macro_f1": 0.8929982487077235, |
| "eval_runtime": 3.3475, |
| "eval_samples_per_second": 2270.666, |
| "eval_steps_per_second": 35.549, |
| "step": 1901 |
| }, |
| { |
| "epoch": 1.0257759074171489, |
| "grad_norm": 7.249199390411377, |
| "learning_rate": 1.5899000526038927e-05, |
| "loss": 0.1908, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.0520778537611783, |
| "grad_norm": 16.18492889404297, |
| "learning_rate": 1.579379274066281e-05, |
| "loss": 0.1919, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.0783798001052078, |
| "grad_norm": 6.383620262145996, |
| "learning_rate": 1.5688584955286692e-05, |
| "loss": 0.1662, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.1046817464492373, |
| "grad_norm": 2.7821247577667236, |
| "learning_rate": 1.5583377169910575e-05, |
| "loss": 0.1832, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.1309836927932668, |
| "grad_norm": 0.20694231986999512, |
| "learning_rate": 1.5478169384534458e-05, |
| "loss": 0.1277, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.157285639137296, |
| "grad_norm": 66.66133880615234, |
| "learning_rate": 1.5372961599158337e-05, |
| "loss": 0.1896, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.1835875854813256, |
| "grad_norm": 5.3264055252075195, |
| "learning_rate": 1.526775381378222e-05, |
| "loss": 0.1535, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.209889531825355, |
| "grad_norm": 3.480900526046753, |
| "learning_rate": 1.5162546028406104e-05, |
| "loss": 0.1767, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.2361914781693846, |
| "grad_norm": 2.1541006565093994, |
| "learning_rate": 1.5057338243029986e-05, |
| "loss": 0.2361, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.262493424513414, |
| "grad_norm": 13.037530899047852, |
| "learning_rate": 1.4952130457653869e-05, |
| "loss": 0.1733, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.2887953708574433, |
| "grad_norm": 6.1545281410217285, |
| "learning_rate": 1.484692267227775e-05, |
| "loss": 0.1608, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.3150973172014728, |
| "grad_norm": 1.8223601579666138, |
| "learning_rate": 1.4741714886901633e-05, |
| "loss": 0.1746, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.3413992635455023, |
| "grad_norm": 3.253241777420044, |
| "learning_rate": 1.4636507101525515e-05, |
| "loss": 0.1466, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.3677012098895318, |
| "grad_norm": 3.3945982456207275, |
| "learning_rate": 1.4531299316149396e-05, |
| "loss": 0.1732, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.3940031562335613, |
| "grad_norm": 6.702133655548096, |
| "learning_rate": 1.4426091530773279e-05, |
| "loss": 0.2324, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.4203051025775908, |
| "grad_norm": 3.2291910648345947, |
| "learning_rate": 1.4320883745397161e-05, |
| "loss": 0.1615, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.4466070489216203, |
| "grad_norm": 8.065141677856445, |
| "learning_rate": 1.4215675960021042e-05, |
| "loss": 0.1668, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.4729089952656498, |
| "grad_norm": 8.395434379577637, |
| "learning_rate": 1.4110468174644925e-05, |
| "loss": 0.2002, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.499210941609679, |
| "grad_norm": 5.985948085784912, |
| "learning_rate": 1.4005260389268807e-05, |
| "loss": 0.1338, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.5255128879537085, |
| "grad_norm": 4.8504791259765625, |
| "learning_rate": 1.3900052603892688e-05, |
| "loss": 0.1493, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.551814834297738, |
| "grad_norm": 30.86811637878418, |
| "learning_rate": 1.3794844818516571e-05, |
| "loss": 0.1653, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.5781167806417675, |
| "grad_norm": 8.025301933288574, |
| "learning_rate": 1.3689637033140453e-05, |
| "loss": 0.195, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.6044187269857968, |
| "grad_norm": 2.7844748497009277, |
| "learning_rate": 1.3584429247764334e-05, |
| "loss": 0.1513, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.6307206733298263, |
| "grad_norm": 15.212594032287598, |
| "learning_rate": 1.3479221462388219e-05, |
| "loss": 0.1311, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.6570226196738558, |
| "grad_norm": 7.984399795532227, |
| "learning_rate": 1.3374013677012101e-05, |
| "loss": 0.1699, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.6833245660178853, |
| "grad_norm": 2.66343092918396, |
| "learning_rate": 1.3268805891635982e-05, |
| "loss": 0.0987, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.7096265123619148, |
| "grad_norm": 1.7281841039657593, |
| "learning_rate": 1.3163598106259865e-05, |
| "loss": 0.1468, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.7359284587059443, |
| "grad_norm": 80.2880859375, |
| "learning_rate": 1.3058390320883747e-05, |
| "loss": 0.1225, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.7622304050499737, |
| "grad_norm": 3.2839515209198, |
| "learning_rate": 1.2953182535507628e-05, |
| "loss": 0.1612, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.7885323513940032, |
| "grad_norm": 6.35798978805542, |
| "learning_rate": 1.2847974750131511e-05, |
| "loss": 0.1319, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.8148342977380327, |
| "grad_norm": 17.910255432128906, |
| "learning_rate": 1.2742766964755394e-05, |
| "loss": 0.2161, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.8411362440820622, |
| "grad_norm": 2.275036573410034, |
| "learning_rate": 1.2637559179379274e-05, |
| "loss": 0.1118, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.8674381904260915, |
| "grad_norm": 20.091514587402344, |
| "learning_rate": 1.2532351394003157e-05, |
| "loss": 0.1463, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.893740136770121, |
| "grad_norm": 0.5615454912185669, |
| "learning_rate": 1.242714360862704e-05, |
| "loss": 0.1648, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.9200420831141505, |
| "grad_norm": 3.871091604232788, |
| "learning_rate": 1.232193582325092e-05, |
| "loss": 0.1325, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.9463440294581797, |
| "grad_norm": 1.768117904663086, |
| "learning_rate": 1.2216728037874803e-05, |
| "loss": 0.1664, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.9726459758022092, |
| "grad_norm": 5.8534393310546875, |
| "learning_rate": 1.2111520252498686e-05, |
| "loss": 0.1578, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.9989479221462387, |
| "grad_norm": 3.766312837600708, |
| "learning_rate": 1.2006312467122567e-05, |
| "loss": 0.1393, |
| "step": 3800 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_FPR_aeb_Arab": 0.00384541434265614, |
| "eval_FPR_arb_Arab": 0.02134204960174158, |
| "eval_FPR_ars_Arab": 0.01041522010687193, |
| "eval_FPR_arz_Arab": 0.020192307688424557, |
| "eval_accuracy": 0.9590843310090778, |
| "eval_loss": 0.16003794968128204, |
| "eval_macro_f1": 0.937683933464698, |
| "eval_runtime": 3.3754, |
| "eval_samples_per_second": 2251.882, |
| "eval_steps_per_second": 35.255, |
| "step": 3802 |
| }, |
| { |
| "epoch": 2.0252498684902682, |
| "grad_norm": 14.620624542236328, |
| "learning_rate": 1.190110468174645e-05, |
| "loss": 0.073, |
| "step": 3850 |
| }, |
| { |
| "epoch": 2.0515518148342977, |
| "grad_norm": 1.2938824892044067, |
| "learning_rate": 1.1795896896370332e-05, |
| "loss": 0.1148, |
| "step": 3900 |
| }, |
| { |
| "epoch": 2.077853761178327, |
| "grad_norm": 3.313081979751587, |
| "learning_rate": 1.1690689110994216e-05, |
| "loss": 0.0746, |
| "step": 3950 |
| }, |
| { |
| "epoch": 2.1041557075223567, |
| "grad_norm": 2.0338821411132812, |
| "learning_rate": 1.1585481325618097e-05, |
| "loss": 0.0977, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.130457653866386, |
| "grad_norm": 0.055320367217063904, |
| "learning_rate": 1.148027354024198e-05, |
| "loss": 0.096, |
| "step": 4050 |
| }, |
| { |
| "epoch": 2.1567596002104157, |
| "grad_norm": 1.0964843034744263, |
| "learning_rate": 1.1375065754865862e-05, |
| "loss": 0.0642, |
| "step": 4100 |
| }, |
| { |
| "epoch": 2.183061546554445, |
| "grad_norm": 1.0340650081634521, |
| "learning_rate": 1.1269857969489743e-05, |
| "loss": 0.1007, |
| "step": 4150 |
| }, |
| { |
| "epoch": 2.2093634928984747, |
| "grad_norm": 4.971868515014648, |
| "learning_rate": 1.1164650184113626e-05, |
| "loss": 0.1083, |
| "step": 4200 |
| }, |
| { |
| "epoch": 2.2356654392425037, |
| "grad_norm": 0.49501538276672363, |
| "learning_rate": 1.1059442398737508e-05, |
| "loss": 0.1068, |
| "step": 4250 |
| }, |
| { |
| "epoch": 2.2619673855865337, |
| "grad_norm": 6.13097620010376, |
| "learning_rate": 1.095423461336139e-05, |
| "loss": 0.0946, |
| "step": 4300 |
| }, |
| { |
| "epoch": 2.2882693319305627, |
| "grad_norm": 5.904395580291748, |
| "learning_rate": 1.0849026827985272e-05, |
| "loss": 0.0758, |
| "step": 4350 |
| }, |
| { |
| "epoch": 2.314571278274592, |
| "grad_norm": 4.2567138671875, |
| "learning_rate": 1.0743819042609155e-05, |
| "loss": 0.111, |
| "step": 4400 |
| }, |
| { |
| "epoch": 2.3408732246186217, |
| "grad_norm": 0.1440172791481018, |
| "learning_rate": 1.0638611257233035e-05, |
| "loss": 0.1104, |
| "step": 4450 |
| }, |
| { |
| "epoch": 2.367175170962651, |
| "grad_norm": 7.970292091369629, |
| "learning_rate": 1.0533403471856918e-05, |
| "loss": 0.0891, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.3934771173066807, |
| "grad_norm": 2.4047350883483887, |
| "learning_rate": 1.04281956864808e-05, |
| "loss": 0.1242, |
| "step": 4550 |
| }, |
| { |
| "epoch": 2.41977906365071, |
| "grad_norm": 14.3352689743042, |
| "learning_rate": 1.0322987901104682e-05, |
| "loss": 0.0649, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.4460810099947397, |
| "grad_norm": 25.1345157623291, |
| "learning_rate": 1.0217780115728564e-05, |
| "loss": 0.0712, |
| "step": 4650 |
| }, |
| { |
| "epoch": 2.472382956338769, |
| "grad_norm": 1.9517714977264404, |
| "learning_rate": 1.0112572330352445e-05, |
| "loss": 0.1032, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.4986849026827986, |
| "grad_norm": 1.327062726020813, |
| "learning_rate": 1.000736454497633e-05, |
| "loss": 0.0962, |
| "step": 4750 |
| }, |
| { |
| "epoch": 2.524986849026828, |
| "grad_norm": 10.327136993408203, |
| "learning_rate": 9.90215675960021e-06, |
| "loss": 0.1092, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.5512887953708576, |
| "grad_norm": 3.8997962474823, |
| "learning_rate": 9.796948974224093e-06, |
| "loss": 0.0681, |
| "step": 4850 |
| }, |
| { |
| "epoch": 2.5775907417148867, |
| "grad_norm": 0.270841121673584, |
| "learning_rate": 9.691741188847975e-06, |
| "loss": 0.1265, |
| "step": 4900 |
| }, |
| { |
| "epoch": 2.6038926880589166, |
| "grad_norm": 0.8220506906509399, |
| "learning_rate": 9.586533403471858e-06, |
| "loss": 0.0726, |
| "step": 4950 |
| }, |
| { |
| "epoch": 2.6301946344029457, |
| "grad_norm": 1.4264813661575317, |
| "learning_rate": 9.48132561809574e-06, |
| "loss": 0.0707, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.656496580746975, |
| "grad_norm": 5.427404880523682, |
| "learning_rate": 9.376117832719622e-06, |
| "loss": 0.0762, |
| "step": 5050 |
| }, |
| { |
| "epoch": 2.6827985270910046, |
| "grad_norm": 39.103004455566406, |
| "learning_rate": 9.270910047343504e-06, |
| "loss": 0.0733, |
| "step": 5100 |
| }, |
| { |
| "epoch": 2.709100473435034, |
| "grad_norm": 2.8170275688171387, |
| "learning_rate": 9.165702261967387e-06, |
| "loss": 0.105, |
| "step": 5150 |
| }, |
| { |
| "epoch": 2.7354024197790636, |
| "grad_norm": 6.285243034362793, |
| "learning_rate": 9.060494476591268e-06, |
| "loss": 0.1054, |
| "step": 5200 |
| }, |
| { |
| "epoch": 2.761704366123093, |
| "grad_norm": 34.959102630615234, |
| "learning_rate": 8.95528669121515e-06, |
| "loss": 0.1168, |
| "step": 5250 |
| }, |
| { |
| "epoch": 2.7880063124671226, |
| "grad_norm": 2.698047399520874, |
| "learning_rate": 8.850078905839033e-06, |
| "loss": 0.0664, |
| "step": 5300 |
| }, |
| { |
| "epoch": 2.814308258811152, |
| "grad_norm": 6.107056617736816, |
| "learning_rate": 8.744871120462914e-06, |
| "loss": 0.0866, |
| "step": 5350 |
| }, |
| { |
| "epoch": 2.8406102051551816, |
| "grad_norm": 6.0492634773254395, |
| "learning_rate": 8.639663335086798e-06, |
| "loss": 0.0921, |
| "step": 5400 |
| }, |
| { |
| "epoch": 2.866912151499211, |
| "grad_norm": 38.75687789916992, |
| "learning_rate": 8.534455549710679e-06, |
| "loss": 0.0932, |
| "step": 5450 |
| }, |
| { |
| "epoch": 2.8932140978432406, |
| "grad_norm": 5.730583190917969, |
| "learning_rate": 8.429247764334562e-06, |
| "loss": 0.0809, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.9195160441872696, |
| "grad_norm": 0.2023005187511444, |
| "learning_rate": 8.324039978958444e-06, |
| "loss": 0.0723, |
| "step": 5550 |
| }, |
| { |
| "epoch": 2.9458179905312996, |
| "grad_norm": 24.816850662231445, |
| "learning_rate": 8.218832193582325e-06, |
| "loss": 0.0758, |
| "step": 5600 |
| }, |
| { |
| "epoch": 2.9721199368753286, |
| "grad_norm": 0.10021505504846573, |
| "learning_rate": 8.113624408206208e-06, |
| "loss": 0.0787, |
| "step": 5650 |
| }, |
| { |
| "epoch": 2.998421883219358, |
| "grad_norm": 3.8389430046081543, |
| "learning_rate": 8.00841662283009e-06, |
| "loss": 0.1321, |
| "step": 5700 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_FPR_aeb_Arab": 0.004037685059788947, |
| "eval_FPR_arb_Arab": 0.009421265139507543, |
| "eval_FPR_ars_Arab": 0.005971392861273241, |
| "eval_FPR_arz_Arab": 0.020192307688424557, |
| "eval_accuracy": 0.9713195632153664, |
| "eval_loss": 0.15336963534355164, |
| "eval_macro_f1": 0.9569564393242584, |
| "eval_runtime": 3.3689, |
| "eval_samples_per_second": 2256.259, |
| "eval_steps_per_second": 35.324, |
| "step": 5703 |
| }, |
| { |
| "epoch": 3.0247238295633876, |
| "grad_norm": 0.30554988980293274, |
| "learning_rate": 7.903208837453971e-06, |
| "loss": 0.0937, |
| "step": 5750 |
| }, |
| { |
| "epoch": 3.051025775907417, |
| "grad_norm": 37.439884185791016, |
| "learning_rate": 7.798001052077856e-06, |
| "loss": 0.0578, |
| "step": 5800 |
| }, |
| { |
| "epoch": 3.0773277222514466, |
| "grad_norm": 0.0822492390871048, |
| "learning_rate": 7.692793266701737e-06, |
| "loss": 0.0636, |
| "step": 5850 |
| }, |
| { |
| "epoch": 3.103629668595476, |
| "grad_norm": 2.7918007373809814, |
| "learning_rate": 7.587585481325619e-06, |
| "loss": 0.0378, |
| "step": 5900 |
| }, |
| { |
| "epoch": 3.1299316149395056, |
| "grad_norm": 32.899818420410156, |
| "learning_rate": 7.482377695949501e-06, |
| "loss": 0.0609, |
| "step": 5950 |
| }, |
| { |
| "epoch": 3.156233561283535, |
| "grad_norm": 0.06830895692110062, |
| "learning_rate": 7.377169910573383e-06, |
| "loss": 0.0433, |
| "step": 6000 |
| }, |
| { |
| "epoch": 3.1825355076275645, |
| "grad_norm": 54.685489654541016, |
| "learning_rate": 7.271962125197265e-06, |
| "loss": 0.056, |
| "step": 6050 |
| }, |
| { |
| "epoch": 3.208837453971594, |
| "grad_norm": 0.8175523281097412, |
| "learning_rate": 7.166754339821147e-06, |
| "loss": 0.0341, |
| "step": 6100 |
| }, |
| { |
| "epoch": 3.2351394003156235, |
| "grad_norm": 0.33226722478866577, |
| "learning_rate": 7.061546554445029e-06, |
| "loss": 0.0482, |
| "step": 6150 |
| }, |
| { |
| "epoch": 3.2614413466596526, |
| "grad_norm": 1.425661325454712, |
| "learning_rate": 6.956338769068912e-06, |
| "loss": 0.0673, |
| "step": 6200 |
| }, |
| { |
| "epoch": 3.2877432930036825, |
| "grad_norm": 0.18895921111106873, |
| "learning_rate": 6.851130983692794e-06, |
| "loss": 0.0359, |
| "step": 6250 |
| }, |
| { |
| "epoch": 3.3140452393477116, |
| "grad_norm": 0.6557305455207825, |
| "learning_rate": 6.7459231983166766e-06, |
| "loss": 0.0382, |
| "step": 6300 |
| }, |
| { |
| "epoch": 3.340347185691741, |
| "grad_norm": 0.008198770694434643, |
| "learning_rate": 6.640715412940558e-06, |
| "loss": 0.0566, |
| "step": 6350 |
| }, |
| { |
| "epoch": 3.3666491320357705, |
| "grad_norm": 0.4695976674556732, |
| "learning_rate": 6.53550762756444e-06, |
| "loss": 0.0654, |
| "step": 6400 |
| }, |
| { |
| "epoch": 3.3929510783798, |
| "grad_norm": 8.628214836120605, |
| "learning_rate": 6.430299842188323e-06, |
| "loss": 0.0427, |
| "step": 6450 |
| }, |
| { |
| "epoch": 3.4192530247238295, |
| "grad_norm": 0.9650713801383972, |
| "learning_rate": 6.3250920568122044e-06, |
| "loss": 0.0645, |
| "step": 6500 |
| }, |
| { |
| "epoch": 3.445554971067859, |
| "grad_norm": 5.836668968200684, |
| "learning_rate": 6.219884271436086e-06, |
| "loss": 0.0397, |
| "step": 6550 |
| }, |
| { |
| "epoch": 3.4718569174118885, |
| "grad_norm": 0.03976545110344887, |
| "learning_rate": 6.11467648605997e-06, |
| "loss": 0.0586, |
| "step": 6600 |
| }, |
| { |
| "epoch": 3.498158863755918, |
| "grad_norm": 19.784215927124023, |
| "learning_rate": 6.009468700683851e-06, |
| "loss": 0.033, |
| "step": 6650 |
| }, |
| { |
| "epoch": 3.5244608100999475, |
| "grad_norm": 2.075496196746826, |
| "learning_rate": 5.904260915307733e-06, |
| "loss": 0.0776, |
| "step": 6700 |
| }, |
| { |
| "epoch": 3.550762756443977, |
| "grad_norm": 7.05810022354126, |
| "learning_rate": 5.799053129931616e-06, |
| "loss": 0.0905, |
| "step": 6750 |
| }, |
| { |
| "epoch": 3.5770647027880065, |
| "grad_norm": 0.012984913773834705, |
| "learning_rate": 5.6938453445554975e-06, |
| "loss": 0.0542, |
| "step": 6800 |
| }, |
| { |
| "epoch": 3.6033666491320355, |
| "grad_norm": 2.701481342315674, |
| "learning_rate": 5.588637559179379e-06, |
| "loss": 0.0625, |
| "step": 6850 |
| }, |
| { |
| "epoch": 3.6296685954760655, |
| "grad_norm": 0.41872379183769226, |
| "learning_rate": 5.483429773803262e-06, |
| "loss": 0.0795, |
| "step": 6900 |
| }, |
| { |
| "epoch": 3.6559705418200945, |
| "grad_norm": 0.13123294711112976, |
| "learning_rate": 5.378221988427144e-06, |
| "loss": 0.0296, |
| "step": 6950 |
| }, |
| { |
| "epoch": 3.682272488164124, |
| "grad_norm": 0.7190969586372375, |
| "learning_rate": 5.273014203051027e-06, |
| "loss": 0.0666, |
| "step": 7000 |
| }, |
| { |
| "epoch": 3.7085744345081535, |
| "grad_norm": 0.1744261384010315, |
| "learning_rate": 5.167806417674909e-06, |
| "loss": 0.0328, |
| "step": 7050 |
| }, |
| { |
| "epoch": 3.734876380852183, |
| "grad_norm": 0.5619340538978577, |
| "learning_rate": 5.062598632298791e-06, |
| "loss": 0.0755, |
| "step": 7100 |
| }, |
| { |
| "epoch": 3.7611783271962125, |
| "grad_norm": 40.665706634521484, |
| "learning_rate": 4.957390846922673e-06, |
| "loss": 0.1041, |
| "step": 7150 |
| }, |
| { |
| "epoch": 3.787480273540242, |
| "grad_norm": 0.06617475301027298, |
| "learning_rate": 4.852183061546555e-06, |
| "loss": 0.0264, |
| "step": 7200 |
| }, |
| { |
| "epoch": 3.8137822198842715, |
| "grad_norm": 5.0283966064453125, |
| "learning_rate": 4.746975276170437e-06, |
| "loss": 0.0789, |
| "step": 7250 |
| }, |
| { |
| "epoch": 3.840084166228301, |
| "grad_norm": 5.660898208618164, |
| "learning_rate": 4.641767490794319e-06, |
| "loss": 0.0582, |
| "step": 7300 |
| }, |
| { |
| "epoch": 3.8663861125723304, |
| "grad_norm": 0.8503484725952148, |
| "learning_rate": 4.536559705418201e-06, |
| "loss": 0.0862, |
| "step": 7350 |
| }, |
| { |
| "epoch": 3.89268805891636, |
| "grad_norm": 13.575056076049805, |
| "learning_rate": 4.431351920042084e-06, |
| "loss": 0.0554, |
| "step": 7400 |
| }, |
| { |
| "epoch": 3.9189900052603894, |
| "grad_norm": 0.25003504753112793, |
| "learning_rate": 4.3261441346659654e-06, |
| "loss": 0.0504, |
| "step": 7450 |
| }, |
| { |
| "epoch": 3.9452919516044185, |
| "grad_norm": 0.022247493267059326, |
| "learning_rate": 4.220936349289847e-06, |
| "loss": 0.0663, |
| "step": 7500 |
| }, |
| { |
| "epoch": 3.9715938979484484, |
| "grad_norm": 0.2591884136199951, |
| "learning_rate": 4.11572856391373e-06, |
| "loss": 0.0361, |
| "step": 7550 |
| }, |
| { |
| "epoch": 3.9978958442924775, |
| "grad_norm": 6.533713340759277, |
| "learning_rate": 4.010520778537612e-06, |
| "loss": 0.0293, |
| "step": 7600 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_FPR_aeb_Arab": 0.004229955776921754, |
| "eval_FPR_arb_Arab": 0.011343972310835613, |
| "eval_FPR_ars_Arab": 0.00458269684702365, |
| "eval_FPR_arz_Arab": 0.015576923073927515, |
| "eval_accuracy": 0.9743454808577818, |
| "eval_loss": 0.15085552632808685, |
| "eval_macro_f1": 0.9633717243752477, |
| "eval_runtime": 3.3689, |
| "eval_samples_per_second": 2256.225, |
| "eval_steps_per_second": 35.323, |
| "step": 7604 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 9505, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 2, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3440682832634112.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|