camelbert-madar-task5 / checkpoint-7604 /trainer_state.json
NadiaGHEZAIEL's picture
Upload folder using huggingface_hub
53674dd verified
{
"best_global_step": 7604,
"best_metric": 0.9633717243752477,
"best_model_checkpoint": "camelbert_madar_task5/checkpoint-7604",
"epoch": 4.0,
"eval_steps": 500,
"global_step": 7604,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.026301946344029457,
"grad_norm": 21.061479568481445,
"learning_rate": 1.9896896370331405e-05,
"loss": 0.9381,
"step": 50
},
{
"epoch": 0.052603892688058915,
"grad_norm": 4.199251651763916,
"learning_rate": 1.9791688584955288e-05,
"loss": 0.5182,
"step": 100
},
{
"epoch": 0.07890583903208838,
"grad_norm": 22.227828979492188,
"learning_rate": 1.968648079957917e-05,
"loss": 0.4486,
"step": 150
},
{
"epoch": 0.10520778537611783,
"grad_norm": 7.481734275817871,
"learning_rate": 1.9581273014203053e-05,
"loss": 0.4422,
"step": 200
},
{
"epoch": 0.1315097317201473,
"grad_norm": 9.7647705078125,
"learning_rate": 1.9476065228826936e-05,
"loss": 0.4304,
"step": 250
},
{
"epoch": 0.15781167806417676,
"grad_norm": 12.080931663513184,
"learning_rate": 1.9370857443450818e-05,
"loss": 0.3672,
"step": 300
},
{
"epoch": 0.1841136244082062,
"grad_norm": 11.353347778320312,
"learning_rate": 1.9265649658074697e-05,
"loss": 0.3771,
"step": 350
},
{
"epoch": 0.21041557075223566,
"grad_norm": 3.3302719593048096,
"learning_rate": 1.916044187269858e-05,
"loss": 0.4053,
"step": 400
},
{
"epoch": 0.23671751709626512,
"grad_norm": 11.869136810302734,
"learning_rate": 1.9055234087322463e-05,
"loss": 0.3754,
"step": 450
},
{
"epoch": 0.2630194634402946,
"grad_norm": 19.71166229248047,
"learning_rate": 1.8950026301946345e-05,
"loss": 0.3909,
"step": 500
},
{
"epoch": 0.289321409784324,
"grad_norm": 82.08606719970703,
"learning_rate": 1.8844818516570228e-05,
"loss": 0.3945,
"step": 550
},
{
"epoch": 0.3156233561283535,
"grad_norm": 5.5329389572143555,
"learning_rate": 1.873961073119411e-05,
"loss": 0.3182,
"step": 600
},
{
"epoch": 0.34192530247238295,
"grad_norm": 10.177448272705078,
"learning_rate": 1.8634402945817993e-05,
"loss": 0.3294,
"step": 650
},
{
"epoch": 0.3682272488164124,
"grad_norm": 11.596871376037598,
"learning_rate": 1.8529195160441876e-05,
"loss": 0.3445,
"step": 700
},
{
"epoch": 0.3945291951604419,
"grad_norm": 5.0095319747924805,
"learning_rate": 1.8423987375065758e-05,
"loss": 0.3403,
"step": 750
},
{
"epoch": 0.4208311415044713,
"grad_norm": 6.569547653198242,
"learning_rate": 1.8318779589689638e-05,
"loss": 0.2767,
"step": 800
},
{
"epoch": 0.4471330878485008,
"grad_norm": 7.269279956817627,
"learning_rate": 1.821357180431352e-05,
"loss": 0.3391,
"step": 850
},
{
"epoch": 0.47343503419253025,
"grad_norm": 6.403675079345703,
"learning_rate": 1.8108364018937403e-05,
"loss": 0.2729,
"step": 900
},
{
"epoch": 0.4997369805365597,
"grad_norm": 18.03633689880371,
"learning_rate": 1.8003156233561285e-05,
"loss": 0.2912,
"step": 950
},
{
"epoch": 0.5260389268805892,
"grad_norm": 11.609797477722168,
"learning_rate": 1.7897948448185168e-05,
"loss": 0.3678,
"step": 1000
},
{
"epoch": 0.5523408732246187,
"grad_norm": 8.587767601013184,
"learning_rate": 1.779274066280905e-05,
"loss": 0.2789,
"step": 1050
},
{
"epoch": 0.578642819568648,
"grad_norm": 2.894766092300415,
"learning_rate": 1.768753287743293e-05,
"loss": 0.2515,
"step": 1100
},
{
"epoch": 0.6049447659126775,
"grad_norm": 14.84619140625,
"learning_rate": 1.7582325092056812e-05,
"loss": 0.2817,
"step": 1150
},
{
"epoch": 0.631246712256707,
"grad_norm": 14.3659029006958,
"learning_rate": 1.7477117306680695e-05,
"loss": 0.2819,
"step": 1200
},
{
"epoch": 0.6575486586007364,
"grad_norm": 24.962841033935547,
"learning_rate": 1.7371909521304578e-05,
"loss": 0.275,
"step": 1250
},
{
"epoch": 0.6838506049447659,
"grad_norm": 2.1663622856140137,
"learning_rate": 1.726670173592846e-05,
"loss": 0.2513,
"step": 1300
},
{
"epoch": 0.7101525512887954,
"grad_norm": 20.324939727783203,
"learning_rate": 1.7161493950552343e-05,
"loss": 0.2862,
"step": 1350
},
{
"epoch": 0.7364544976328248,
"grad_norm": 12.115033149719238,
"learning_rate": 1.7056286165176222e-05,
"loss": 0.2489,
"step": 1400
},
{
"epoch": 0.7627564439768543,
"grad_norm": 9.39247989654541,
"learning_rate": 1.6951078379800105e-05,
"loss": 0.2199,
"step": 1450
},
{
"epoch": 0.7890583903208838,
"grad_norm": 11.820609092712402,
"learning_rate": 1.684587059442399e-05,
"loss": 0.2334,
"step": 1500
},
{
"epoch": 0.8153603366649133,
"grad_norm": 5.685638427734375,
"learning_rate": 1.6740662809047873e-05,
"loss": 0.2859,
"step": 1550
},
{
"epoch": 0.8416622830089426,
"grad_norm": 1.4263566732406616,
"learning_rate": 1.6635455023671752e-05,
"loss": 0.2712,
"step": 1600
},
{
"epoch": 0.8679642293529721,
"grad_norm": 43.12693786621094,
"learning_rate": 1.6530247238295635e-05,
"loss": 0.2236,
"step": 1650
},
{
"epoch": 0.8942661756970016,
"grad_norm": 18.322067260742188,
"learning_rate": 1.6425039452919518e-05,
"loss": 0.2176,
"step": 1700
},
{
"epoch": 0.920568122041031,
"grad_norm": 8.125885009765625,
"learning_rate": 1.63198316675434e-05,
"loss": 0.2344,
"step": 1750
},
{
"epoch": 0.9468700683850605,
"grad_norm": 4.2774457931518555,
"learning_rate": 1.6214623882167283e-05,
"loss": 0.2173,
"step": 1800
},
{
"epoch": 0.97317201472909,
"grad_norm": 8.311309814453125,
"learning_rate": 1.6109416096791165e-05,
"loss": 0.207,
"step": 1850
},
{
"epoch": 0.9994739610731194,
"grad_norm": 18.770065307617188,
"learning_rate": 1.6004208311415045e-05,
"loss": 0.2261,
"step": 1900
},
{
"epoch": 1.0,
"eval_FPR_aeb_Arab": 0.007114016533913859,
"eval_FPR_arb_Arab": 0.007306287251046666,
"eval_FPR_ars_Arab": 0.0274961810821419,
"eval_FPR_arz_Arab": 0.03769230768505917,
"eval_accuracy": 0.9382975924220497,
"eval_loss": 0.2227914035320282,
"eval_macro_f1": 0.8929982487077235,
"eval_runtime": 3.3475,
"eval_samples_per_second": 2270.666,
"eval_steps_per_second": 35.549,
"step": 1901
},
{
"epoch": 1.0257759074171489,
"grad_norm": 7.249199390411377,
"learning_rate": 1.5899000526038927e-05,
"loss": 0.1908,
"step": 1950
},
{
"epoch": 1.0520778537611783,
"grad_norm": 16.18492889404297,
"learning_rate": 1.579379274066281e-05,
"loss": 0.1919,
"step": 2000
},
{
"epoch": 1.0783798001052078,
"grad_norm": 6.383620262145996,
"learning_rate": 1.5688584955286692e-05,
"loss": 0.1662,
"step": 2050
},
{
"epoch": 1.1046817464492373,
"grad_norm": 2.7821247577667236,
"learning_rate": 1.5583377169910575e-05,
"loss": 0.1832,
"step": 2100
},
{
"epoch": 1.1309836927932668,
"grad_norm": 0.20694231986999512,
"learning_rate": 1.5478169384534458e-05,
"loss": 0.1277,
"step": 2150
},
{
"epoch": 1.157285639137296,
"grad_norm": 66.66133880615234,
"learning_rate": 1.5372961599158337e-05,
"loss": 0.1896,
"step": 2200
},
{
"epoch": 1.1835875854813256,
"grad_norm": 5.3264055252075195,
"learning_rate": 1.526775381378222e-05,
"loss": 0.1535,
"step": 2250
},
{
"epoch": 1.209889531825355,
"grad_norm": 3.480900526046753,
"learning_rate": 1.5162546028406104e-05,
"loss": 0.1767,
"step": 2300
},
{
"epoch": 1.2361914781693846,
"grad_norm": 2.1541006565093994,
"learning_rate": 1.5057338243029986e-05,
"loss": 0.2361,
"step": 2350
},
{
"epoch": 1.262493424513414,
"grad_norm": 13.037530899047852,
"learning_rate": 1.4952130457653869e-05,
"loss": 0.1733,
"step": 2400
},
{
"epoch": 1.2887953708574433,
"grad_norm": 6.1545281410217285,
"learning_rate": 1.484692267227775e-05,
"loss": 0.1608,
"step": 2450
},
{
"epoch": 1.3150973172014728,
"grad_norm": 1.8223601579666138,
"learning_rate": 1.4741714886901633e-05,
"loss": 0.1746,
"step": 2500
},
{
"epoch": 1.3413992635455023,
"grad_norm": 3.253241777420044,
"learning_rate": 1.4636507101525515e-05,
"loss": 0.1466,
"step": 2550
},
{
"epoch": 1.3677012098895318,
"grad_norm": 3.3945982456207275,
"learning_rate": 1.4531299316149396e-05,
"loss": 0.1732,
"step": 2600
},
{
"epoch": 1.3940031562335613,
"grad_norm": 6.702133655548096,
"learning_rate": 1.4426091530773279e-05,
"loss": 0.2324,
"step": 2650
},
{
"epoch": 1.4203051025775908,
"grad_norm": 3.2291910648345947,
"learning_rate": 1.4320883745397161e-05,
"loss": 0.1615,
"step": 2700
},
{
"epoch": 1.4466070489216203,
"grad_norm": 8.065141677856445,
"learning_rate": 1.4215675960021042e-05,
"loss": 0.1668,
"step": 2750
},
{
"epoch": 1.4729089952656498,
"grad_norm": 8.395434379577637,
"learning_rate": 1.4110468174644925e-05,
"loss": 0.2002,
"step": 2800
},
{
"epoch": 1.499210941609679,
"grad_norm": 5.985948085784912,
"learning_rate": 1.4005260389268807e-05,
"loss": 0.1338,
"step": 2850
},
{
"epoch": 1.5255128879537085,
"grad_norm": 4.8504791259765625,
"learning_rate": 1.3900052603892688e-05,
"loss": 0.1493,
"step": 2900
},
{
"epoch": 1.551814834297738,
"grad_norm": 30.86811637878418,
"learning_rate": 1.3794844818516571e-05,
"loss": 0.1653,
"step": 2950
},
{
"epoch": 1.5781167806417675,
"grad_norm": 8.025301933288574,
"learning_rate": 1.3689637033140453e-05,
"loss": 0.195,
"step": 3000
},
{
"epoch": 1.6044187269857968,
"grad_norm": 2.7844748497009277,
"learning_rate": 1.3584429247764334e-05,
"loss": 0.1513,
"step": 3050
},
{
"epoch": 1.6307206733298263,
"grad_norm": 15.212594032287598,
"learning_rate": 1.3479221462388219e-05,
"loss": 0.1311,
"step": 3100
},
{
"epoch": 1.6570226196738558,
"grad_norm": 7.984399795532227,
"learning_rate": 1.3374013677012101e-05,
"loss": 0.1699,
"step": 3150
},
{
"epoch": 1.6833245660178853,
"grad_norm": 2.66343092918396,
"learning_rate": 1.3268805891635982e-05,
"loss": 0.0987,
"step": 3200
},
{
"epoch": 1.7096265123619148,
"grad_norm": 1.7281841039657593,
"learning_rate": 1.3163598106259865e-05,
"loss": 0.1468,
"step": 3250
},
{
"epoch": 1.7359284587059443,
"grad_norm": 80.2880859375,
"learning_rate": 1.3058390320883747e-05,
"loss": 0.1225,
"step": 3300
},
{
"epoch": 1.7622304050499737,
"grad_norm": 3.2839515209198,
"learning_rate": 1.2953182535507628e-05,
"loss": 0.1612,
"step": 3350
},
{
"epoch": 1.7885323513940032,
"grad_norm": 6.35798978805542,
"learning_rate": 1.2847974750131511e-05,
"loss": 0.1319,
"step": 3400
},
{
"epoch": 1.8148342977380327,
"grad_norm": 17.910255432128906,
"learning_rate": 1.2742766964755394e-05,
"loss": 0.2161,
"step": 3450
},
{
"epoch": 1.8411362440820622,
"grad_norm": 2.275036573410034,
"learning_rate": 1.2637559179379274e-05,
"loss": 0.1118,
"step": 3500
},
{
"epoch": 1.8674381904260915,
"grad_norm": 20.091514587402344,
"learning_rate": 1.2532351394003157e-05,
"loss": 0.1463,
"step": 3550
},
{
"epoch": 1.893740136770121,
"grad_norm": 0.5615454912185669,
"learning_rate": 1.242714360862704e-05,
"loss": 0.1648,
"step": 3600
},
{
"epoch": 1.9200420831141505,
"grad_norm": 3.871091604232788,
"learning_rate": 1.232193582325092e-05,
"loss": 0.1325,
"step": 3650
},
{
"epoch": 1.9463440294581797,
"grad_norm": 1.768117904663086,
"learning_rate": 1.2216728037874803e-05,
"loss": 0.1664,
"step": 3700
},
{
"epoch": 1.9726459758022092,
"grad_norm": 5.8534393310546875,
"learning_rate": 1.2111520252498686e-05,
"loss": 0.1578,
"step": 3750
},
{
"epoch": 1.9989479221462387,
"grad_norm": 3.766312837600708,
"learning_rate": 1.2006312467122567e-05,
"loss": 0.1393,
"step": 3800
},
{
"epoch": 2.0,
"eval_FPR_aeb_Arab": 0.00384541434265614,
"eval_FPR_arb_Arab": 0.02134204960174158,
"eval_FPR_ars_Arab": 0.01041522010687193,
"eval_FPR_arz_Arab": 0.020192307688424557,
"eval_accuracy": 0.9590843310090778,
"eval_loss": 0.16003794968128204,
"eval_macro_f1": 0.937683933464698,
"eval_runtime": 3.3754,
"eval_samples_per_second": 2251.882,
"eval_steps_per_second": 35.255,
"step": 3802
},
{
"epoch": 2.0252498684902682,
"grad_norm": 14.620624542236328,
"learning_rate": 1.190110468174645e-05,
"loss": 0.073,
"step": 3850
},
{
"epoch": 2.0515518148342977,
"grad_norm": 1.2938824892044067,
"learning_rate": 1.1795896896370332e-05,
"loss": 0.1148,
"step": 3900
},
{
"epoch": 2.077853761178327,
"grad_norm": 3.313081979751587,
"learning_rate": 1.1690689110994216e-05,
"loss": 0.0746,
"step": 3950
},
{
"epoch": 2.1041557075223567,
"grad_norm": 2.0338821411132812,
"learning_rate": 1.1585481325618097e-05,
"loss": 0.0977,
"step": 4000
},
{
"epoch": 2.130457653866386,
"grad_norm": 0.055320367217063904,
"learning_rate": 1.148027354024198e-05,
"loss": 0.096,
"step": 4050
},
{
"epoch": 2.1567596002104157,
"grad_norm": 1.0964843034744263,
"learning_rate": 1.1375065754865862e-05,
"loss": 0.0642,
"step": 4100
},
{
"epoch": 2.183061546554445,
"grad_norm": 1.0340650081634521,
"learning_rate": 1.1269857969489743e-05,
"loss": 0.1007,
"step": 4150
},
{
"epoch": 2.2093634928984747,
"grad_norm": 4.971868515014648,
"learning_rate": 1.1164650184113626e-05,
"loss": 0.1083,
"step": 4200
},
{
"epoch": 2.2356654392425037,
"grad_norm": 0.49501538276672363,
"learning_rate": 1.1059442398737508e-05,
"loss": 0.1068,
"step": 4250
},
{
"epoch": 2.2619673855865337,
"grad_norm": 6.13097620010376,
"learning_rate": 1.095423461336139e-05,
"loss": 0.0946,
"step": 4300
},
{
"epoch": 2.2882693319305627,
"grad_norm": 5.904395580291748,
"learning_rate": 1.0849026827985272e-05,
"loss": 0.0758,
"step": 4350
},
{
"epoch": 2.314571278274592,
"grad_norm": 4.2567138671875,
"learning_rate": 1.0743819042609155e-05,
"loss": 0.111,
"step": 4400
},
{
"epoch": 2.3408732246186217,
"grad_norm": 0.1440172791481018,
"learning_rate": 1.0638611257233035e-05,
"loss": 0.1104,
"step": 4450
},
{
"epoch": 2.367175170962651,
"grad_norm": 7.970292091369629,
"learning_rate": 1.0533403471856918e-05,
"loss": 0.0891,
"step": 4500
},
{
"epoch": 2.3934771173066807,
"grad_norm": 2.4047350883483887,
"learning_rate": 1.04281956864808e-05,
"loss": 0.1242,
"step": 4550
},
{
"epoch": 2.41977906365071,
"grad_norm": 14.3352689743042,
"learning_rate": 1.0322987901104682e-05,
"loss": 0.0649,
"step": 4600
},
{
"epoch": 2.4460810099947397,
"grad_norm": 25.1345157623291,
"learning_rate": 1.0217780115728564e-05,
"loss": 0.0712,
"step": 4650
},
{
"epoch": 2.472382956338769,
"grad_norm": 1.9517714977264404,
"learning_rate": 1.0112572330352445e-05,
"loss": 0.1032,
"step": 4700
},
{
"epoch": 2.4986849026827986,
"grad_norm": 1.327062726020813,
"learning_rate": 1.000736454497633e-05,
"loss": 0.0962,
"step": 4750
},
{
"epoch": 2.524986849026828,
"grad_norm": 10.327136993408203,
"learning_rate": 9.90215675960021e-06,
"loss": 0.1092,
"step": 4800
},
{
"epoch": 2.5512887953708576,
"grad_norm": 3.8997962474823,
"learning_rate": 9.796948974224093e-06,
"loss": 0.0681,
"step": 4850
},
{
"epoch": 2.5775907417148867,
"grad_norm": 0.270841121673584,
"learning_rate": 9.691741188847975e-06,
"loss": 0.1265,
"step": 4900
},
{
"epoch": 2.6038926880589166,
"grad_norm": 0.8220506906509399,
"learning_rate": 9.586533403471858e-06,
"loss": 0.0726,
"step": 4950
},
{
"epoch": 2.6301946344029457,
"grad_norm": 1.4264813661575317,
"learning_rate": 9.48132561809574e-06,
"loss": 0.0707,
"step": 5000
},
{
"epoch": 2.656496580746975,
"grad_norm": 5.427404880523682,
"learning_rate": 9.376117832719622e-06,
"loss": 0.0762,
"step": 5050
},
{
"epoch": 2.6827985270910046,
"grad_norm": 39.103004455566406,
"learning_rate": 9.270910047343504e-06,
"loss": 0.0733,
"step": 5100
},
{
"epoch": 2.709100473435034,
"grad_norm": 2.8170275688171387,
"learning_rate": 9.165702261967387e-06,
"loss": 0.105,
"step": 5150
},
{
"epoch": 2.7354024197790636,
"grad_norm": 6.285243034362793,
"learning_rate": 9.060494476591268e-06,
"loss": 0.1054,
"step": 5200
},
{
"epoch": 2.761704366123093,
"grad_norm": 34.959102630615234,
"learning_rate": 8.95528669121515e-06,
"loss": 0.1168,
"step": 5250
},
{
"epoch": 2.7880063124671226,
"grad_norm": 2.698047399520874,
"learning_rate": 8.850078905839033e-06,
"loss": 0.0664,
"step": 5300
},
{
"epoch": 2.814308258811152,
"grad_norm": 6.107056617736816,
"learning_rate": 8.744871120462914e-06,
"loss": 0.0866,
"step": 5350
},
{
"epoch": 2.8406102051551816,
"grad_norm": 6.0492634773254395,
"learning_rate": 8.639663335086798e-06,
"loss": 0.0921,
"step": 5400
},
{
"epoch": 2.866912151499211,
"grad_norm": 38.75687789916992,
"learning_rate": 8.534455549710679e-06,
"loss": 0.0932,
"step": 5450
},
{
"epoch": 2.8932140978432406,
"grad_norm": 5.730583190917969,
"learning_rate": 8.429247764334562e-06,
"loss": 0.0809,
"step": 5500
},
{
"epoch": 2.9195160441872696,
"grad_norm": 0.2023005187511444,
"learning_rate": 8.324039978958444e-06,
"loss": 0.0723,
"step": 5550
},
{
"epoch": 2.9458179905312996,
"grad_norm": 24.816850662231445,
"learning_rate": 8.218832193582325e-06,
"loss": 0.0758,
"step": 5600
},
{
"epoch": 2.9721199368753286,
"grad_norm": 0.10021505504846573,
"learning_rate": 8.113624408206208e-06,
"loss": 0.0787,
"step": 5650
},
{
"epoch": 2.998421883219358,
"grad_norm": 3.8389430046081543,
"learning_rate": 8.00841662283009e-06,
"loss": 0.1321,
"step": 5700
},
{
"epoch": 3.0,
"eval_FPR_aeb_Arab": 0.004037685059788947,
"eval_FPR_arb_Arab": 0.009421265139507543,
"eval_FPR_ars_Arab": 0.005971392861273241,
"eval_FPR_arz_Arab": 0.020192307688424557,
"eval_accuracy": 0.9713195632153664,
"eval_loss": 0.15336963534355164,
"eval_macro_f1": 0.9569564393242584,
"eval_runtime": 3.3689,
"eval_samples_per_second": 2256.259,
"eval_steps_per_second": 35.324,
"step": 5703
},
{
"epoch": 3.0247238295633876,
"grad_norm": 0.30554988980293274,
"learning_rate": 7.903208837453971e-06,
"loss": 0.0937,
"step": 5750
},
{
"epoch": 3.051025775907417,
"grad_norm": 37.439884185791016,
"learning_rate": 7.798001052077856e-06,
"loss": 0.0578,
"step": 5800
},
{
"epoch": 3.0773277222514466,
"grad_norm": 0.0822492390871048,
"learning_rate": 7.692793266701737e-06,
"loss": 0.0636,
"step": 5850
},
{
"epoch": 3.103629668595476,
"grad_norm": 2.7918007373809814,
"learning_rate": 7.587585481325619e-06,
"loss": 0.0378,
"step": 5900
},
{
"epoch": 3.1299316149395056,
"grad_norm": 32.899818420410156,
"learning_rate": 7.482377695949501e-06,
"loss": 0.0609,
"step": 5950
},
{
"epoch": 3.156233561283535,
"grad_norm": 0.06830895692110062,
"learning_rate": 7.377169910573383e-06,
"loss": 0.0433,
"step": 6000
},
{
"epoch": 3.1825355076275645,
"grad_norm": 54.685489654541016,
"learning_rate": 7.271962125197265e-06,
"loss": 0.056,
"step": 6050
},
{
"epoch": 3.208837453971594,
"grad_norm": 0.8175523281097412,
"learning_rate": 7.166754339821147e-06,
"loss": 0.0341,
"step": 6100
},
{
"epoch": 3.2351394003156235,
"grad_norm": 0.33226722478866577,
"learning_rate": 7.061546554445029e-06,
"loss": 0.0482,
"step": 6150
},
{
"epoch": 3.2614413466596526,
"grad_norm": 1.425661325454712,
"learning_rate": 6.956338769068912e-06,
"loss": 0.0673,
"step": 6200
},
{
"epoch": 3.2877432930036825,
"grad_norm": 0.18895921111106873,
"learning_rate": 6.851130983692794e-06,
"loss": 0.0359,
"step": 6250
},
{
"epoch": 3.3140452393477116,
"grad_norm": 0.6557305455207825,
"learning_rate": 6.7459231983166766e-06,
"loss": 0.0382,
"step": 6300
},
{
"epoch": 3.340347185691741,
"grad_norm": 0.008198770694434643,
"learning_rate": 6.640715412940558e-06,
"loss": 0.0566,
"step": 6350
},
{
"epoch": 3.3666491320357705,
"grad_norm": 0.4695976674556732,
"learning_rate": 6.53550762756444e-06,
"loss": 0.0654,
"step": 6400
},
{
"epoch": 3.3929510783798,
"grad_norm": 8.628214836120605,
"learning_rate": 6.430299842188323e-06,
"loss": 0.0427,
"step": 6450
},
{
"epoch": 3.4192530247238295,
"grad_norm": 0.9650713801383972,
"learning_rate": 6.3250920568122044e-06,
"loss": 0.0645,
"step": 6500
},
{
"epoch": 3.445554971067859,
"grad_norm": 5.836668968200684,
"learning_rate": 6.219884271436086e-06,
"loss": 0.0397,
"step": 6550
},
{
"epoch": 3.4718569174118885,
"grad_norm": 0.03976545110344887,
"learning_rate": 6.11467648605997e-06,
"loss": 0.0586,
"step": 6600
},
{
"epoch": 3.498158863755918,
"grad_norm": 19.784215927124023,
"learning_rate": 6.009468700683851e-06,
"loss": 0.033,
"step": 6650
},
{
"epoch": 3.5244608100999475,
"grad_norm": 2.075496196746826,
"learning_rate": 5.904260915307733e-06,
"loss": 0.0776,
"step": 6700
},
{
"epoch": 3.550762756443977,
"grad_norm": 7.05810022354126,
"learning_rate": 5.799053129931616e-06,
"loss": 0.0905,
"step": 6750
},
{
"epoch": 3.5770647027880065,
"grad_norm": 0.012984913773834705,
"learning_rate": 5.6938453445554975e-06,
"loss": 0.0542,
"step": 6800
},
{
"epoch": 3.6033666491320355,
"grad_norm": 2.701481342315674,
"learning_rate": 5.588637559179379e-06,
"loss": 0.0625,
"step": 6850
},
{
"epoch": 3.6296685954760655,
"grad_norm": 0.41872379183769226,
"learning_rate": 5.483429773803262e-06,
"loss": 0.0795,
"step": 6900
},
{
"epoch": 3.6559705418200945,
"grad_norm": 0.13123294711112976,
"learning_rate": 5.378221988427144e-06,
"loss": 0.0296,
"step": 6950
},
{
"epoch": 3.682272488164124,
"grad_norm": 0.7190969586372375,
"learning_rate": 5.273014203051027e-06,
"loss": 0.0666,
"step": 7000
},
{
"epoch": 3.7085744345081535,
"grad_norm": 0.1744261384010315,
"learning_rate": 5.167806417674909e-06,
"loss": 0.0328,
"step": 7050
},
{
"epoch": 3.734876380852183,
"grad_norm": 0.5619340538978577,
"learning_rate": 5.062598632298791e-06,
"loss": 0.0755,
"step": 7100
},
{
"epoch": 3.7611783271962125,
"grad_norm": 40.665706634521484,
"learning_rate": 4.957390846922673e-06,
"loss": 0.1041,
"step": 7150
},
{
"epoch": 3.787480273540242,
"grad_norm": 0.06617475301027298,
"learning_rate": 4.852183061546555e-06,
"loss": 0.0264,
"step": 7200
},
{
"epoch": 3.8137822198842715,
"grad_norm": 5.0283966064453125,
"learning_rate": 4.746975276170437e-06,
"loss": 0.0789,
"step": 7250
},
{
"epoch": 3.840084166228301,
"grad_norm": 5.660898208618164,
"learning_rate": 4.641767490794319e-06,
"loss": 0.0582,
"step": 7300
},
{
"epoch": 3.8663861125723304,
"grad_norm": 0.8503484725952148,
"learning_rate": 4.536559705418201e-06,
"loss": 0.0862,
"step": 7350
},
{
"epoch": 3.89268805891636,
"grad_norm": 13.575056076049805,
"learning_rate": 4.431351920042084e-06,
"loss": 0.0554,
"step": 7400
},
{
"epoch": 3.9189900052603894,
"grad_norm": 0.25003504753112793,
"learning_rate": 4.3261441346659654e-06,
"loss": 0.0504,
"step": 7450
},
{
"epoch": 3.9452919516044185,
"grad_norm": 0.022247493267059326,
"learning_rate": 4.220936349289847e-06,
"loss": 0.0663,
"step": 7500
},
{
"epoch": 3.9715938979484484,
"grad_norm": 0.2591884136199951,
"learning_rate": 4.11572856391373e-06,
"loss": 0.0361,
"step": 7550
},
{
"epoch": 3.9978958442924775,
"grad_norm": 6.533713340759277,
"learning_rate": 4.010520778537612e-06,
"loss": 0.0293,
"step": 7600
},
{
"epoch": 4.0,
"eval_FPR_aeb_Arab": 0.004229955776921754,
"eval_FPR_arb_Arab": 0.011343972310835613,
"eval_FPR_ars_Arab": 0.00458269684702365,
"eval_FPR_arz_Arab": 0.015576923073927515,
"eval_accuracy": 0.9743454808577818,
"eval_loss": 0.15085552632808685,
"eval_macro_f1": 0.9633717243752477,
"eval_runtime": 3.3689,
"eval_samples_per_second": 2256.225,
"eval_steps_per_second": 35.323,
"step": 7604
}
],
"logging_steps": 50,
"max_steps": 9505,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 2,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3440682832634112.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}