PoliBERT-MY / run-2 /checkpoint-8400 /trainer_state.json
YagiASAFAS's picture
Training in progress, epoch 1
0d87982 verified
{
"best_global_step": 7200,
"best_metric": 0.9362734744251768,
"best_model_checkpoint": "./results/run-2/checkpoint-7200",
"epoch": 7.0,
"eval_steps": 500,
"global_step": 8400,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08336807002917883,
"grad_norm": 1.2878578901290894,
"learning_rate": 1.4e-05,
"loss": 1.2644,
"step": 100
},
{
"epoch": 0.16673614005835766,
"grad_norm": 1.0009047985076904,
"learning_rate": 2.8e-05,
"loss": 0.4758,
"step": 200
},
{
"epoch": 0.25010421008753647,
"grad_norm": 0.7635347843170166,
"learning_rate": 4.2e-05,
"loss": 0.4054,
"step": 300
},
{
"epoch": 0.3334722801167153,
"grad_norm": 0.8168604373931885,
"learning_rate": 5.6e-05,
"loss": 0.3468,
"step": 400
},
{
"epoch": 0.4168403501458941,
"grad_norm": 1.1069319248199463,
"learning_rate": 7e-05,
"loss": 0.3051,
"step": 500
},
{
"epoch": 0.5002084201750729,
"grad_norm": 1.015086054801941,
"learning_rate": 6.962534789124383e-05,
"loss": 0.3044,
"step": 600
},
{
"epoch": 0.5835764902042517,
"grad_norm": 0.8430781364440918,
"learning_rate": 6.925069578248769e-05,
"loss": 0.2742,
"step": 700
},
{
"epoch": 0.6669445602334306,
"grad_norm": 0.950409471988678,
"learning_rate": 6.887604367373152e-05,
"loss": 0.2573,
"step": 800
},
{
"epoch": 0.7503126302626094,
"grad_norm": 1.171885371208191,
"learning_rate": 6.850139156497538e-05,
"loss": 0.2517,
"step": 900
},
{
"epoch": 0.8336807002917882,
"grad_norm": 0.9133914709091187,
"learning_rate": 6.812673945621922e-05,
"loss": 0.2435,
"step": 1000
},
{
"epoch": 0.9170487703209671,
"grad_norm": 0.7939682602882385,
"learning_rate": 6.775208734746307e-05,
"loss": 0.2373,
"step": 1100
},
{
"epoch": 1.0,
"grad_norm": 0.49670639634132385,
"learning_rate": 6.737743523870691e-05,
"loss": 0.2363,
"step": 1200
},
{
"epoch": 1.0,
"eval_administration_accuracy": 0.9302689180737961,
"eval_administration_f1": 0.9077189587684091,
"eval_corruption_accuracy": 0.9534083802376485,
"eval_corruption_f1": 0.9474431988592255,
"eval_democracy_accuracy": 0.9473629351678132,
"eval_democracy_f1": 0.9358042521135516,
"eval_development_accuracy": 0.9094225557640192,
"eval_development_f1": 0.8937523747915405,
"eval_economy_accuracy": 0.924119241192412,
"eval_economy_f1": 0.9156575066932242,
"eval_education_accuracy": 0.9621638524077548,
"eval_education_f1": 0.9583809787976314,
"eval_environment_accuracy": 0.9775901605169898,
"eval_environment_f1": 0.9754739686963207,
"eval_instability_accuracy": 0.9422555764019178,
"eval_instability_f1": 0.9343996801103537,
"eval_leadership_accuracy": 0.7970606629143214,
"eval_leadership_f1": 0.7702375495381067,
"eval_loss": 0.22745274007320404,
"eval_overall_accuracy": 0.9296261552359112,
"eval_overall_f1": 0.9199542168750479,
"eval_race_accuracy": 0.9505941213258287,
"eval_race_f1": 0.9483610849847235,
"eval_religion_accuracy": 0.9505941213258287,
"eval_religion_f1": 0.9489643730683611,
"eval_runtime": 11.0331,
"eval_safety_accuracy": 0.9106733375026058,
"eval_safety_f1": 0.9032566760791263,
"eval_samples_per_second": 869.566,
"eval_steps_per_second": 54.382,
"step": 1200
},
{
"epoch": 1.0833680700291788,
"grad_norm": 0.949184238910675,
"learning_rate": 6.700278312995076e-05,
"loss": 0.1968,
"step": 1300
},
{
"epoch": 1.1667361400583576,
"grad_norm": 0.7735128402709961,
"learning_rate": 6.66281310211946e-05,
"loss": 0.1924,
"step": 1400
},
{
"epoch": 1.2501042100875366,
"grad_norm": 1.0187016725540161,
"learning_rate": 6.625347891243845e-05,
"loss": 0.1945,
"step": 1500
},
{
"epoch": 1.3334722801167154,
"grad_norm": 0.9707878828048706,
"learning_rate": 6.587882680368229e-05,
"loss": 0.1931,
"step": 1600
},
{
"epoch": 1.4168403501458942,
"grad_norm": 1.4602934122085571,
"learning_rate": 6.550417469492613e-05,
"loss": 0.1967,
"step": 1700
},
{
"epoch": 1.500208420175073,
"grad_norm": 1.2449183464050293,
"learning_rate": 6.512952258616998e-05,
"loss": 0.1843,
"step": 1800
},
{
"epoch": 1.5835764902042517,
"grad_norm": 1.4943019151687622,
"learning_rate": 6.475487047741382e-05,
"loss": 0.1844,
"step": 1900
},
{
"epoch": 1.6669445602334307,
"grad_norm": 1.8496220111846924,
"learning_rate": 6.438021836865767e-05,
"loss": 0.1797,
"step": 2000
},
{
"epoch": 1.7503126302626093,
"grad_norm": 1.0578210353851318,
"learning_rate": 6.400556625990151e-05,
"loss": 0.1875,
"step": 2100
},
{
"epoch": 1.8336807002917883,
"grad_norm": 1.0538480281829834,
"learning_rate": 6.363091415114536e-05,
"loss": 0.1685,
"step": 2200
},
{
"epoch": 1.917048770320967,
"grad_norm": 1.5351332426071167,
"learning_rate": 6.32562620423892e-05,
"loss": 0.1744,
"step": 2300
},
{
"epoch": 2.0,
"grad_norm": 0.9578828811645508,
"learning_rate": 6.288160993363306e-05,
"loss": 0.1811,
"step": 2400
},
{
"epoch": 2.0,
"eval_administration_accuracy": 0.9344381905357515,
"eval_administration_f1": 0.9296143144580357,
"eval_corruption_accuracy": 0.9604961434229727,
"eval_corruption_f1": 0.9577803781696625,
"eval_democracy_accuracy": 0.9534083802376485,
"eval_democracy_f1": 0.9434755362160818,
"eval_development_accuracy": 0.9193245778611632,
"eval_development_f1": 0.9132597892423039,
"eval_economy_accuracy": 0.931311236189285,
"eval_economy_f1": 0.92884213698996,
"eval_education_accuracy": 0.9653950385657702,
"eval_education_f1": 0.963862144635937,
"eval_environment_accuracy": 0.979153637690223,
"eval_environment_f1": 0.9783610154594226,
"eval_instability_accuracy": 0.9459036898061288,
"eval_instability_f1": 0.9423381513536221,
"eval_leadership_accuracy": 0.8387533875338753,
"eval_leadership_f1": 0.8323519207670947,
"eval_loss": 0.2031705528497696,
"eval_overall_accuracy": 0.9372177055103883,
"eval_overall_f1": 0.9339233416905627,
"eval_race_accuracy": 0.9516364394413175,
"eval_race_f1": 0.9498984068182696,
"eval_religion_accuracy": 0.9558057119032729,
"eval_religion_f1": 0.9554428854878803,
"eval_runtime": 11.0378,
"eval_safety_accuracy": 0.9109860329372524,
"eval_safety_f1": 0.9118534206884823,
"eval_samples_per_second": 869.197,
"eval_steps_per_second": 54.359,
"step": 2400
},
{
"epoch": 2.083368070029179,
"grad_norm": 1.0708891153335571,
"learning_rate": 6.25069578248769e-05,
"loss": 0.1338,
"step": 2500
},
{
"epoch": 2.1667361400583576,
"grad_norm": 1.0800749063491821,
"learning_rate": 6.213230571612073e-05,
"loss": 0.1268,
"step": 2600
},
{
"epoch": 2.2501042100875366,
"grad_norm": 1.3577998876571655,
"learning_rate": 6.175765360736459e-05,
"loss": 0.1226,
"step": 2700
},
{
"epoch": 2.333472280116715,
"grad_norm": 1.325534701347351,
"learning_rate": 6.138300149860843e-05,
"loss": 0.1258,
"step": 2800
},
{
"epoch": 2.416840350145894,
"grad_norm": 1.7846063375473022,
"learning_rate": 6.100834938985228e-05,
"loss": 0.1308,
"step": 2900
},
{
"epoch": 2.500208420175073,
"grad_norm": 1.61028254032135,
"learning_rate": 6.0633697281096124e-05,
"loss": 0.1305,
"step": 3000
},
{
"epoch": 2.5835764902042517,
"grad_norm": 1.2228693962097168,
"learning_rate": 6.025904517233996e-05,
"loss": 0.1315,
"step": 3100
},
{
"epoch": 2.6669445602334307,
"grad_norm": 0.9486532807350159,
"learning_rate": 5.988439306358381e-05,
"loss": 0.1337,
"step": 3200
},
{
"epoch": 2.7503126302626093,
"grad_norm": 1.3199305534362793,
"learning_rate": 5.9509740954827654e-05,
"loss": 0.132,
"step": 3300
},
{
"epoch": 2.8336807002917883,
"grad_norm": 1.8821613788604736,
"learning_rate": 5.91350888460715e-05,
"loss": 0.1258,
"step": 3400
},
{
"epoch": 2.9170487703209673,
"grad_norm": 1.2816129922866821,
"learning_rate": 5.8760436737315346e-05,
"loss": 0.1282,
"step": 3500
},
{
"epoch": 3.0,
"grad_norm": 5.737521171569824,
"learning_rate": 5.838578462855919e-05,
"loss": 0.1297,
"step": 3600
},
{
"epoch": 3.0,
"eval_administration_accuracy": 0.9375651448822181,
"eval_administration_f1": 0.9309689641816119,
"eval_corruption_accuracy": 0.9451740671252866,
"eval_corruption_f1": 0.9476001634812502,
"eval_democracy_accuracy": 0.9508025849489264,
"eval_democracy_f1": 0.9508783301626133,
"eval_development_accuracy": 0.921304982280592,
"eval_development_f1": 0.9145210502797096,
"eval_economy_accuracy": 0.930373149885345,
"eval_economy_f1": 0.9257602899924291,
"eval_education_accuracy": 0.9670627475505524,
"eval_education_f1": 0.9651765113114141,
"eval_environment_accuracy": 0.9805086512403586,
"eval_environment_f1": 0.9785021531425564,
"eval_instability_accuracy": 0.9419428809672712,
"eval_instability_f1": 0.9412425903038227,
"eval_leadership_accuracy": 0.8284344381905357,
"eval_leadership_f1": 0.8287745355631846,
"eval_loss": 0.21378232538700104,
"eval_overall_accuracy": 0.9360450976304634,
"eval_overall_f1": 0.9337781089602167,
"eval_race_accuracy": 0.9541380029184907,
"eval_race_f1": 0.9510004306979282,
"eval_religion_accuracy": 0.955701480091724,
"eval_religion_f1": 0.9539633431565642,
"eval_runtime": 11.1303,
"eval_safety_accuracy": 0.919533041484261,
"eval_safety_f1": 0.9169489452495161,
"eval_samples_per_second": 861.972,
"eval_steps_per_second": 53.907,
"step": 3600
},
{
"epoch": 3.083368070029179,
"grad_norm": 0.7353236079216003,
"learning_rate": 5.801113251980304e-05,
"loss": 0.0912,
"step": 3700
},
{
"epoch": 3.1667361400583576,
"grad_norm": 1.2753976583480835,
"learning_rate": 5.763648041104688e-05,
"loss": 0.0954,
"step": 3800
},
{
"epoch": 3.2501042100875366,
"grad_norm": 1.4709268808364868,
"learning_rate": 5.726182830229073e-05,
"loss": 0.0936,
"step": 3900
},
{
"epoch": 3.333472280116715,
"grad_norm": 0.8533086180686951,
"learning_rate": 5.6887176193534574e-05,
"loss": 0.0927,
"step": 4000
},
{
"epoch": 3.416840350145894,
"grad_norm": 1.1082295179367065,
"learning_rate": 5.6512524084778413e-05,
"loss": 0.091,
"step": 4100
},
{
"epoch": 3.500208420175073,
"grad_norm": 1.3943876028060913,
"learning_rate": 5.613787197602226e-05,
"loss": 0.0966,
"step": 4200
},
{
"epoch": 3.5835764902042517,
"grad_norm": 1.483786940574646,
"learning_rate": 5.5763219867266105e-05,
"loss": 0.0965,
"step": 4300
},
{
"epoch": 3.6669445602334307,
"grad_norm": 1.0739402770996094,
"learning_rate": 5.538856775850995e-05,
"loss": 0.0988,
"step": 4400
},
{
"epoch": 3.7503126302626093,
"grad_norm": 1.2540950775146484,
"learning_rate": 5.5013915649753796e-05,
"loss": 0.0921,
"step": 4500
},
{
"epoch": 3.8336807002917883,
"grad_norm": 1.5991489887237549,
"learning_rate": 5.463926354099764e-05,
"loss": 0.1044,
"step": 4600
},
{
"epoch": 3.9170487703209673,
"grad_norm": 1.71983003616333,
"learning_rate": 5.426461143224149e-05,
"loss": 0.0891,
"step": 4700
},
{
"epoch": 4.0,
"grad_norm": 0.6225905418395996,
"learning_rate": 5.3889959323485334e-05,
"loss": 0.0935,
"step": 4800
},
{
"epoch": 4.0,
"eval_administration_accuracy": 0.9401709401709402,
"eval_administration_f1": 0.9351367023709064,
"eval_corruption_accuracy": 0.9568480300187617,
"eval_corruption_f1": 0.9560031856416787,
"eval_democracy_accuracy": 0.9563268709610173,
"eval_democracy_f1": 0.9539147719955986,
"eval_development_accuracy": 0.9181780279341255,
"eval_development_f1": 0.9124572322058647,
"eval_economy_accuracy": 0.929435063581405,
"eval_economy_f1": 0.9284126365185199,
"eval_education_accuracy": 0.9672712111736502,
"eval_education_f1": 0.9657318355589747,
"eval_environment_accuracy": 0.9805086512403586,
"eval_environment_f1": 0.9793120114242748,
"eval_instability_accuracy": 0.9337085678549093,
"eval_instability_f1": 0.9360007265571828,
"eval_leadership_accuracy": 0.8413591828225975,
"eval_leadership_f1": 0.8351481808317307,
"eval_loss": 0.22106842696666718,
"eval_overall_accuracy": 0.9382079077201028,
"eval_overall_f1": 0.9358746589615409,
"eval_race_accuracy": 0.9561184073379195,
"eval_race_f1": 0.9543792719379139,
"eval_religion_accuracy": 0.9570564936418595,
"eval_religion_f1": 0.9560165149767178,
"eval_runtime": 10.9937,
"eval_safety_accuracy": 0.9215134459036898,
"eval_safety_f1": 0.9179828375191263,
"eval_samples_per_second": 872.679,
"eval_steps_per_second": 54.577,
"step": 4800
},
{
"epoch": 4.083368070029179,
"grad_norm": 0.6198378801345825,
"learning_rate": 5.351530721472918e-05,
"loss": 0.0658,
"step": 4900
},
{
"epoch": 4.166736140058358,
"grad_norm": 1.2718008756637573,
"learning_rate": 5.3140655105973025e-05,
"loss": 0.0636,
"step": 5000
},
{
"epoch": 4.250104210087536,
"grad_norm": 1.476962685585022,
"learning_rate": 5.2766002997216864e-05,
"loss": 0.0742,
"step": 5100
},
{
"epoch": 4.333472280116715,
"grad_norm": 1.1485812664031982,
"learning_rate": 5.239135088846071e-05,
"loss": 0.0693,
"step": 5200
},
{
"epoch": 4.416840350145894,
"grad_norm": 1.2767895460128784,
"learning_rate": 5.2016698779704556e-05,
"loss": 0.0722,
"step": 5300
},
{
"epoch": 4.500208420175073,
"grad_norm": 0.6865003108978271,
"learning_rate": 5.16420466709484e-05,
"loss": 0.0701,
"step": 5400
},
{
"epoch": 4.583576490204251,
"grad_norm": 0.6567270755767822,
"learning_rate": 5.127114108327981e-05,
"loss": 0.069,
"step": 5500
},
{
"epoch": 4.66694456023343,
"grad_norm": 1.1187883615493774,
"learning_rate": 5.0896488974523654e-05,
"loss": 0.0716,
"step": 5600
},
{
"epoch": 4.750312630262609,
"grad_norm": 1.3242968320846558,
"learning_rate": 5.05218368657675e-05,
"loss": 0.0702,
"step": 5700
},
{
"epoch": 4.833680700291788,
"grad_norm": 1.458292841911316,
"learning_rate": 5.0147184757011346e-05,
"loss": 0.07,
"step": 5800
},
{
"epoch": 4.917048770320967,
"grad_norm": 0.7626857757568359,
"learning_rate": 4.977253264825519e-05,
"loss": 0.0733,
"step": 5900
},
{
"epoch": 5.0,
"grad_norm": 0.7239888310432434,
"learning_rate": 4.939788053949904e-05,
"loss": 0.0675,
"step": 6000
},
{
"epoch": 5.0,
"eval_administration_accuracy": 0.9306858453199917,
"eval_administration_f1": 0.9286500450585223,
"eval_corruption_accuracy": 0.9606003752345216,
"eval_corruption_f1": 0.9583563972269709,
"eval_democracy_accuracy": 0.9416301855326246,
"eval_democracy_f1": 0.9449257041063754,
"eval_development_accuracy": 0.9198457369189077,
"eval_development_f1": 0.9154714581737773,
"eval_economy_accuracy": 0.9344381905357515,
"eval_economy_f1": 0.9306995601866777,
"eval_education_accuracy": 0.9673754429851991,
"eval_education_f1": 0.9648349073741065,
"eval_environment_accuracy": 0.9803001876172608,
"eval_environment_f1": 0.9792105415103222,
"eval_instability_accuracy": 0.944757139879091,
"eval_instability_f1": 0.9391731690067155,
"eval_leadership_accuracy": 0.8202001250781739,
"eval_leadership_f1": 0.8225964840638132,
"eval_loss": 0.2460281401872635,
"eval_overall_accuracy": 0.9360364116461678,
"eval_overall_f1": 0.9339890155373726,
"eval_race_accuracy": 0.9508025849489264,
"eval_race_f1": 0.9501627076296991,
"eval_religion_accuracy": 0.9597665207421305,
"eval_religion_f1": 0.9575416931106416,
"eval_runtime": 10.9751,
"eval_safety_accuracy": 0.9220346049614342,
"eval_safety_f1": 0.916245519000848,
"eval_samples_per_second": 874.157,
"eval_steps_per_second": 54.669,
"step": 6000
},
{
"epoch": 5.083368070029179,
"grad_norm": 1.2741373777389526,
"learning_rate": 4.902322843074288e-05,
"loss": 0.0551,
"step": 6100
},
{
"epoch": 5.166736140058358,
"grad_norm": 0.5817021727561951,
"learning_rate": 4.864857632198673e-05,
"loss": 0.0505,
"step": 6200
},
{
"epoch": 5.250104210087536,
"grad_norm": 1.0689315795898438,
"learning_rate": 4.827392421323057e-05,
"loss": 0.049,
"step": 6300
},
{
"epoch": 5.333472280116715,
"grad_norm": 1.5673719644546509,
"learning_rate": 4.789927210447441e-05,
"loss": 0.0554,
"step": 6400
},
{
"epoch": 5.416840350145894,
"grad_norm": 1.1274124383926392,
"learning_rate": 4.752461999571826e-05,
"loss": 0.0523,
"step": 6500
},
{
"epoch": 5.500208420175073,
"grad_norm": 0.836388349533081,
"learning_rate": 4.7149967886962105e-05,
"loss": 0.0516,
"step": 6600
},
{
"epoch": 5.583576490204251,
"grad_norm": 1.7012029886245728,
"learning_rate": 4.677531577820595e-05,
"loss": 0.0559,
"step": 6700
},
{
"epoch": 5.66694456023343,
"grad_norm": 1.0244345664978027,
"learning_rate": 4.6400663669449796e-05,
"loss": 0.0552,
"step": 6800
},
{
"epoch": 5.750312630262609,
"grad_norm": 1.2528122663497925,
"learning_rate": 4.602601156069364e-05,
"loss": 0.0508,
"step": 6900
},
{
"epoch": 5.833680700291788,
"grad_norm": 1.044662594795227,
"learning_rate": 4.565135945193749e-05,
"loss": 0.0531,
"step": 7000
},
{
"epoch": 5.917048770320967,
"grad_norm": 0.8813854455947876,
"learning_rate": 4.5276707343181333e-05,
"loss": 0.0482,
"step": 7100
},
{
"epoch": 6.0,
"grad_norm": 1.4556822776794434,
"learning_rate": 4.490205523442518e-05,
"loss": 0.0516,
"step": 7200
},
{
"epoch": 6.0,
"eval_administration_accuracy": 0.9356889722743381,
"eval_administration_f1": 0.9317820245044639,
"eval_corruption_accuracy": 0.9579945799457995,
"eval_corruption_f1": 0.9568343721711066,
"eval_democracy_accuracy": 0.9589326662497394,
"eval_democracy_f1": 0.95607216308818,
"eval_development_accuracy": 0.9168230143839899,
"eval_development_f1": 0.9143401221807914,
"eval_economy_accuracy": 0.9308943089430894,
"eval_economy_f1": 0.9294738147048666,
"eval_education_accuracy": 0.9669585157390036,
"eval_education_f1": 0.965464450546169,
"eval_environment_accuracy": 0.9786324786324786,
"eval_environment_f1": 0.977778980162068,
"eval_instability_accuracy": 0.9441317490097978,
"eval_instability_f1": 0.9395705042517813,
"eval_leadership_accuracy": 0.8412549510110485,
"eval_leadership_f1": 0.8395230039371011,
"eval_loss": 0.26496145129203796,
"eval_overall_accuracy": 0.938372941421722,
"eval_overall_f1": 0.9362734744251768,
"eval_race_accuracy": 0.9541380029184907,
"eval_race_f1": 0.9519598766506976,
"eval_religion_accuracy": 0.9579945799457995,
"eval_religion_f1": 0.9568944771373694,
"eval_runtime": 10.8844,
"eval_safety_accuracy": 0.9170314780070877,
"eval_safety_f1": 0.915587903767528,
"eval_samples_per_second": 881.445,
"eval_steps_per_second": 55.125,
"step": 7200
},
{
"epoch": 6.083368070029179,
"grad_norm": 0.7286815643310547,
"learning_rate": 4.452740312566902e-05,
"loss": 0.0366,
"step": 7300
},
{
"epoch": 6.166736140058358,
"grad_norm": 2.7514851093292236,
"learning_rate": 4.4152751016912864e-05,
"loss": 0.0392,
"step": 7400
},
{
"epoch": 6.250104210087536,
"grad_norm": 1.0808284282684326,
"learning_rate": 4.377809890815671e-05,
"loss": 0.0365,
"step": 7500
},
{
"epoch": 6.333472280116715,
"grad_norm": 0.5677124261856079,
"learning_rate": 4.3403446799400555e-05,
"loss": 0.0439,
"step": 7600
},
{
"epoch": 6.416840350145894,
"grad_norm": 0.7869217991828918,
"learning_rate": 4.30287946906444e-05,
"loss": 0.034,
"step": 7700
},
{
"epoch": 6.500208420175073,
"grad_norm": 0.6197104454040527,
"learning_rate": 4.265414258188825e-05,
"loss": 0.0375,
"step": 7800
},
{
"epoch": 6.583576490204251,
"grad_norm": 1.126968264579773,
"learning_rate": 4.228323699421965e-05,
"loss": 0.0437,
"step": 7900
},
{
"epoch": 6.66694456023343,
"grad_norm": 0.6215279698371887,
"learning_rate": 4.190858488546349e-05,
"loss": 0.0413,
"step": 8000
},
{
"epoch": 6.750312630262609,
"grad_norm": 1.0289812088012695,
"learning_rate": 4.153393277670734e-05,
"loss": 0.0383,
"step": 8100
},
{
"epoch": 6.833680700291788,
"grad_norm": 1.2618753910064697,
"learning_rate": 4.1159280667951184e-05,
"loss": 0.037,
"step": 8200
},
{
"epoch": 6.917048770320967,
"grad_norm": 0.48271915316581726,
"learning_rate": 4.078462855919502e-05,
"loss": 0.0403,
"step": 8300
},
{
"epoch": 7.0,
"grad_norm": 1.4230297803878784,
"learning_rate": 4.040997645043887e-05,
"loss": 0.0421,
"step": 8400
},
{
"epoch": 7.0,
"eval_administration_accuracy": 0.9262038774233896,
"eval_administration_f1": 0.9271433160043757,
"eval_corruption_accuracy": 0.9570564936418595,
"eval_corruption_f1": 0.9567097147624816,
"eval_democracy_accuracy": 0.9531999166145507,
"eval_democracy_f1": 0.9521445432226068,
"eval_development_accuracy": 0.9057744423598082,
"eval_development_f1": 0.9056708555901262,
"eval_economy_accuracy": 0.9282885136543673,
"eval_economy_f1": 0.926345436006994,
"eval_education_accuracy": 0.962059620596206,
"eval_education_f1": 0.961835890625152,
"eval_environment_accuracy": 0.9805086512403586,
"eval_environment_f1": 0.9790877092918502,
"eval_instability_accuracy": 0.9382947675630603,
"eval_instability_f1": 0.9376909954205174,
"eval_leadership_accuracy": 0.8362518240567021,
"eval_leadership_f1": 0.8378707787677608,
"eval_loss": 0.2829027473926544,
"eval_overall_accuracy": 0.9348638037662428,
"eval_overall_f1": 0.9341947646002294,
"eval_race_accuracy": 0.9518449030644153,
"eval_race_f1": 0.9506769394120873,
"eval_religion_accuracy": 0.9585157390035439,
"eval_religion_f1": 0.9579338409216822,
"eval_runtime": 10.9473,
"eval_safety_accuracy": 0.9203668959766521,
"eval_safety_f1": 0.917227155177119,
"eval_samples_per_second": 876.38,
"eval_steps_per_second": 54.808,
"step": 8400
}
],
"logging_steps": 100,
"max_steps": 19184,
"num_input_tokens_seen": 0,
"num_train_epochs": 16,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 2,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 1
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 7.070187082265395e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": {
"gradient_accumulation_steps": 2,
"learning_rate": 7e-05,
"num_train_epochs": 16
}
}