PoliBERT-MY / run-0 /checkpoint-5990 /trainer_state.json
YagiASAFAS's picture
Training in progress, epoch 1
dc2d381 verified
{
"best_global_step": 5990,
"best_metric": 0.9394484894664766,
"best_model_checkpoint": "./results/run-0/checkpoint-5990",
"epoch": 9.98374322634431,
"eval_steps": 500,
"global_step": 5990,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.16673614005835766,
"grad_norm": 2.8743319511413574,
"learning_rate": 6e-06,
"loss": 1.4727,
"step": 100
},
{
"epoch": 0.3334722801167153,
"grad_norm": 0.8747783899307251,
"learning_rate": 1.2e-05,
"loss": 0.617,
"step": 200
},
{
"epoch": 0.5002084201750729,
"grad_norm": 0.7884620428085327,
"learning_rate": 1.8e-05,
"loss": 0.4493,
"step": 300
},
{
"epoch": 0.6669445602334306,
"grad_norm": 0.8924545645713806,
"learning_rate": 2.4e-05,
"loss": 0.3876,
"step": 400
},
{
"epoch": 0.8336807002917882,
"grad_norm": 0.8031777739524841,
"learning_rate": 3e-05,
"loss": 0.3361,
"step": 500
},
{
"epoch": 1.0,
"grad_norm": 0.5999459624290466,
"learning_rate": 2.9453551912568308e-05,
"loss": 0.3004,
"step": 600
},
{
"epoch": 1.0,
"eval_administration_accuracy": 0.9140087554721701,
"eval_administration_f1": 0.8740363354524873,
"eval_corruption_accuracy": 0.9370439858244737,
"eval_corruption_f1": 0.9192595640701061,
"eval_democracy_accuracy": 0.9347508859703981,
"eval_democracy_f1": 0.9095054688749907,
"eval_development_accuracy": 0.8763810715030227,
"eval_development_f1": 0.8253993969919758,
"eval_economy_accuracy": 0.9010840108401084,
"eval_economy_f1": 0.8737219947600353,
"eval_education_accuracy": 0.9571607254534084,
"eval_education_f1": 0.9485581438064299,
"eval_environment_accuracy": 0.9734208880550344,
"eval_environment_f1": 0.9693817386774968,
"eval_instability_accuracy": 0.9219303731498854,
"eval_instability_f1": 0.8878400653107733,
"eval_leadership_accuracy": 0.7652699603919116,
"eval_leadership_f1": 0.7080044131422123,
"eval_loss": 0.2839118242263794,
"eval_overall_accuracy": 0.9120109790841496,
"eval_overall_f1": 0.8849771384459072,
"eval_race_accuracy": 0.9283927454659162,
"eval_race_f1": 0.907176591968042,
"eval_religion_accuracy": 0.9369397540129247,
"eval_religion_f1": 0.9217332965382043,
"eval_runtime": 10.9229,
"eval_safety_accuracy": 0.8977485928705441,
"eval_safety_f1": 0.8751086517581315,
"eval_samples_per_second": 878.336,
"eval_steps_per_second": 54.93,
"step": 600
},
{
"epoch": 1.1667361400583576,
"grad_norm": 0.6991692781448364,
"learning_rate": 2.890710382513661e-05,
"loss": 0.2667,
"step": 700
},
{
"epoch": 1.3334722801167154,
"grad_norm": 0.6808522343635559,
"learning_rate": 2.836065573770492e-05,
"loss": 0.2573,
"step": 800
},
{
"epoch": 1.500208420175073,
"grad_norm": 0.8924445509910583,
"learning_rate": 2.7814207650273226e-05,
"loss": 0.2447,
"step": 900
},
{
"epoch": 1.6669445602334307,
"grad_norm": 0.9563364386558533,
"learning_rate": 2.7267759562841533e-05,
"loss": 0.2282,
"step": 1000
},
{
"epoch": 1.8336807002917883,
"grad_norm": 0.8881794810295105,
"learning_rate": 2.6721311475409837e-05,
"loss": 0.2217,
"step": 1100
},
{
"epoch": 2.0,
"grad_norm": 1.2946265935897827,
"learning_rate": 2.6174863387978144e-05,
"loss": 0.2197,
"step": 1200
},
{
"epoch": 2.0,
"eval_administration_accuracy": 0.9364185949551803,
"eval_administration_f1": 0.9264664888238818,
"eval_corruption_accuracy": 0.9562226391494684,
"eval_corruption_f1": 0.9508457468931831,
"eval_democracy_accuracy": 0.947571398790911,
"eval_democracy_f1": 0.9355668234958175,
"eval_development_accuracy": 0.9049405878674172,
"eval_development_f1": 0.8818640965909518,
"eval_economy_accuracy": 0.924119241192412,
"eval_economy_f1": 0.9144295841480942,
"eval_education_accuracy": 0.9669585157390036,
"eval_education_f1": 0.9649722301773493,
"eval_environment_accuracy": 0.981029810298103,
"eval_environment_f1": 0.9786767163906938,
"eval_instability_accuracy": 0.9422555764019178,
"eval_instability_f1": 0.9320449284299851,
"eval_leadership_accuracy": 0.8006045445069835,
"eval_leadership_f1": 0.7682063698028097,
"eval_loss": 0.21895167231559753,
"eval_overall_accuracy": 0.9317629073726635,
"eval_overall_f1": 0.9213111296186804,
"eval_race_accuracy": 0.9516364394413175,
"eval_race_f1": 0.9462871699636376,
"eval_religion_accuracy": 0.9586199708150928,
"eval_religion_f1": 0.9568464833874664,
"eval_runtime": 10.9295,
"eval_safety_accuracy": 0.9107775693141547,
"eval_safety_f1": 0.8995269173202963,
"eval_samples_per_second": 877.809,
"eval_steps_per_second": 54.897,
"step": 1200
},
{
"epoch": 2.1667361400583576,
"grad_norm": 0.8773576021194458,
"learning_rate": 2.5628415300546447e-05,
"loss": 0.1841,
"step": 1300
},
{
"epoch": 2.333472280116715,
"grad_norm": 0.7541666030883789,
"learning_rate": 2.5081967213114754e-05,
"loss": 0.1757,
"step": 1400
},
{
"epoch": 2.500208420175073,
"grad_norm": 0.9500707983970642,
"learning_rate": 2.453551912568306e-05,
"loss": 0.1776,
"step": 1500
},
{
"epoch": 2.6669445602334307,
"grad_norm": 0.8599845170974731,
"learning_rate": 2.398907103825137e-05,
"loss": 0.1757,
"step": 1600
},
{
"epoch": 2.8336807002917883,
"grad_norm": 1.0130952596664429,
"learning_rate": 2.3442622950819672e-05,
"loss": 0.1675,
"step": 1700
},
{
"epoch": 3.0,
"grad_norm": 1.4208967685699463,
"learning_rate": 2.289617486338798e-05,
"loss": 0.1664,
"step": 1800
},
{
"epoch": 3.0,
"eval_administration_accuracy": 0.940379403794038,
"eval_administration_f1": 0.9332522249114288,
"eval_corruption_accuracy": 0.9609130706691682,
"eval_corruption_f1": 0.9591571276198037,
"eval_democracy_accuracy": 0.9572649572649573,
"eval_democracy_f1": 0.951957009924363,
"eval_development_accuracy": 0.9184907233687721,
"eval_development_f1": 0.909063933465515,
"eval_economy_accuracy": 0.9327704815509693,
"eval_economy_f1": 0.9291641275521808,
"eval_education_accuracy": 0.967479674796748,
"eval_education_f1": 0.965512166473417,
"eval_environment_accuracy": 0.9835313737752762,
"eval_environment_f1": 0.9820983135141137,
"eval_instability_accuracy": 0.9481967896602043,
"eval_instability_f1": 0.9444250756726008,
"eval_leadership_accuracy": 0.8285386700020846,
"eval_leadership_f1": 0.8147910628849035,
"eval_loss": 0.19577622413635254,
"eval_overall_accuracy": 0.9393283996942533,
"eval_overall_f1": 0.9346706078324559,
"eval_race_accuracy": 0.9551803210339795,
"eval_race_f1": 0.9521792365558912,
"eval_religion_accuracy": 0.9598707525536794,
"eval_religion_f1": 0.9582958411796174,
"eval_runtime": 10.979,
"eval_safety_accuracy": 0.9193245778611632,
"eval_safety_f1": 0.9161511742356354,
"eval_samples_per_second": 873.853,
"eval_steps_per_second": 54.65,
"step": 1800
},
{
"epoch": 3.1667361400583576,
"grad_norm": 0.7676725387573242,
"learning_rate": 2.2349726775956283e-05,
"loss": 0.137,
"step": 1900
},
{
"epoch": 3.333472280116715,
"grad_norm": 0.9001705050468445,
"learning_rate": 2.180327868852459e-05,
"loss": 0.1366,
"step": 2000
},
{
"epoch": 3.500208420175073,
"grad_norm": 0.9584451913833618,
"learning_rate": 2.1256830601092897e-05,
"loss": 0.1319,
"step": 2100
},
{
"epoch": 3.6669445602334307,
"grad_norm": 0.9352701306343079,
"learning_rate": 2.0710382513661204e-05,
"loss": 0.1324,
"step": 2200
},
{
"epoch": 3.8336807002917883,
"grad_norm": 0.8512120842933655,
"learning_rate": 2.0163934426229508e-05,
"loss": 0.1333,
"step": 2300
},
{
"epoch": 4.0,
"grad_norm": 0.8892608880996704,
"learning_rate": 1.9617486338797815e-05,
"loss": 0.1266,
"step": 2400
},
{
"epoch": 4.0,
"eval_administration_accuracy": 0.9418386491557224,
"eval_administration_f1": 0.9352052966447959,
"eval_corruption_accuracy": 0.9622680842193038,
"eval_corruption_f1": 0.9610755105054823,
"eval_democracy_accuracy": 0.9578903481342506,
"eval_democracy_f1": 0.9547168679700065,
"eval_development_accuracy": 0.9218261413383365,
"eval_development_f1": 0.9169350889919825,
"eval_economy_accuracy": 0.9337085678549093,
"eval_economy_f1": 0.9328708122199544,
"eval_education_accuracy": 0.9685219929122368,
"eval_education_f1": 0.9667154036014268,
"eval_environment_accuracy": 0.9815509693558474,
"eval_environment_f1": 0.9809518427248292,
"eval_instability_accuracy": 0.947571398790911,
"eval_instability_f1": 0.9441507785991868,
"eval_leadership_accuracy": 0.8358348968105066,
"eval_leadership_f1": 0.82702607509575,
"eval_loss": 0.1933656632900238,
"eval_overall_accuracy": 0.9413782919880481,
"eval_overall_f1": 0.9381715218630767,
"eval_race_accuracy": 0.9591411298728372,
"eval_race_f1": 0.9571038116721658,
"eval_religion_accuracy": 0.9621638524077548,
"eval_religion_f1": 0.9611025172696737,
"eval_runtime": 10.9996,
"eval_safety_accuracy": 0.9242234730039608,
"eval_safety_f1": 0.920204257061666,
"eval_samples_per_second": 872.211,
"eval_steps_per_second": 54.547,
"step": 2400
},
{
"epoch": 4.166736140058358,
"grad_norm": 0.6617388129234314,
"learning_rate": 1.907103825136612e-05,
"loss": 0.1039,
"step": 2500
},
{
"epoch": 4.333472280116715,
"grad_norm": 0.7575182914733887,
"learning_rate": 1.8524590163934426e-05,
"loss": 0.1105,
"step": 2600
},
{
"epoch": 4.500208420175073,
"grad_norm": 0.69528728723526,
"learning_rate": 1.7978142076502733e-05,
"loss": 0.1084,
"step": 2700
},
{
"epoch": 4.66694456023343,
"grad_norm": 0.9533807039260864,
"learning_rate": 1.743169398907104e-05,
"loss": 0.1051,
"step": 2800
},
{
"epoch": 4.833680700291788,
"grad_norm": 0.7591508030891418,
"learning_rate": 1.6885245901639344e-05,
"loss": 0.1008,
"step": 2900
},
{
"epoch": 5.0,
"grad_norm": 1.3957374095916748,
"learning_rate": 1.633879781420765e-05,
"loss": 0.1013,
"step": 3000
},
{
"epoch": 5.0,
"eval_administration_accuracy": 0.9406920992286846,
"eval_administration_f1": 0.935638525668697,
"eval_corruption_accuracy": 0.9626850114654992,
"eval_corruption_f1": 0.9611422434993832,
"eval_democracy_accuracy": 0.9496560350218887,
"eval_democracy_f1": 0.9499559979035322,
"eval_development_accuracy": 0.9201584323535543,
"eval_development_f1": 0.9155534426644083,
"eval_economy_accuracy": 0.9354805086512403,
"eval_economy_f1": 0.9325398706072688,
"eval_education_accuracy": 0.9697727746508235,
"eval_education_f1": 0.9681349456850368,
"eval_environment_accuracy": 0.9820721284135918,
"eval_environment_f1": 0.9811885272501003,
"eval_instability_accuracy": 0.9472587033562644,
"eval_instability_f1": 0.9442377846524443,
"eval_leadership_accuracy": 0.8364602876797999,
"eval_leadership_f1": 0.828026933450708,
"eval_loss": 0.19940470159053802,
"eval_overall_accuracy": 0.9405183795427697,
"eval_overall_f1": 0.9377003414537354,
"eval_race_accuracy": 0.9554930164686263,
"eval_race_f1": 0.9547403715439807,
"eval_religion_accuracy": 0.9633104023347926,
"eval_religion_f1": 0.9620625852940151,
"eval_runtime": 10.9306,
"eval_safety_accuracy": 0.923181154888472,
"eval_safety_f1": 0.9191828692252488,
"eval_samples_per_second": 877.722,
"eval_steps_per_second": 54.892,
"step": 3000
},
{
"epoch": 5.166736140058358,
"grad_norm": 0.5871491432189941,
"learning_rate": 1.5792349726775955e-05,
"loss": 0.0873,
"step": 3100
},
{
"epoch": 5.333472280116715,
"grad_norm": 0.8581504225730896,
"learning_rate": 1.5245901639344264e-05,
"loss": 0.0862,
"step": 3200
},
{
"epoch": 5.500208420175073,
"grad_norm": 0.9329476952552795,
"learning_rate": 1.4699453551912569e-05,
"loss": 0.0849,
"step": 3300
},
{
"epoch": 5.66694456023343,
"grad_norm": 0.8767560124397278,
"learning_rate": 1.4153005464480874e-05,
"loss": 0.088,
"step": 3400
},
{
"epoch": 5.833680700291788,
"grad_norm": 0.8050324320793152,
"learning_rate": 1.3606557377049181e-05,
"loss": 0.0826,
"step": 3500
},
{
"epoch": 6.0,
"grad_norm": 0.9876068830490112,
"learning_rate": 1.3060109289617487e-05,
"loss": 0.0811,
"step": 3600
},
{
"epoch": 6.0,
"eval_administration_accuracy": 0.9434021263289556,
"eval_administration_f1": 0.9387369505688391,
"eval_corruption_accuracy": 0.962997706900146,
"eval_corruption_f1": 0.9612502739505763,
"eval_democracy_accuracy": 0.9562226391494684,
"eval_democracy_f1": 0.954550595976141,
"eval_development_accuracy": 0.9203668959766521,
"eval_development_f1": 0.9144818227568411,
"eval_economy_accuracy": 0.9361058995205337,
"eval_economy_f1": 0.9313226219336369,
"eval_education_accuracy": 0.9694600792161768,
"eval_education_f1": 0.9675599527596068,
"eval_environment_accuracy": 0.9811340421096518,
"eval_environment_f1": 0.9803282887818994,
"eval_instability_accuracy": 0.9459036898061288,
"eval_instability_f1": 0.942472499866658,
"eval_leadership_accuracy": 0.8430268918073797,
"eval_leadership_f1": 0.8387770890259914,
"eval_loss": 0.20078489184379578,
"eval_overall_accuracy": 0.941647557501216,
"eval_overall_f1": 0.938780687918615,
"eval_race_accuracy": 0.9580988117573483,
"eval_race_f1": 0.9562411064132649,
"eval_religion_accuracy": 0.961955388784657,
"eval_religion_f1": 0.960819752774671,
"eval_runtime": 11.0615,
"eval_safety_accuracy": 0.9210965186574943,
"eval_safety_f1": 0.9188273002152547,
"eval_samples_per_second": 867.33,
"eval_steps_per_second": 54.242,
"step": 3600
},
{
"epoch": 6.166736140058358,
"grad_norm": 0.8045951724052429,
"learning_rate": 1.2513661202185792e-05,
"loss": 0.0701,
"step": 3700
},
{
"epoch": 6.333472280116715,
"grad_norm": 0.6562448143959045,
"learning_rate": 1.19672131147541e-05,
"loss": 0.0726,
"step": 3800
},
{
"epoch": 6.500208420175073,
"grad_norm": 0.7833376526832581,
"learning_rate": 1.1420765027322405e-05,
"loss": 0.0673,
"step": 3900
},
{
"epoch": 6.66694456023343,
"grad_norm": 0.8071606755256653,
"learning_rate": 1.087431693989071e-05,
"loss": 0.0733,
"step": 4000
},
{
"epoch": 6.833680700291788,
"grad_norm": 0.7821515798568726,
"learning_rate": 1.0327868852459017e-05,
"loss": 0.0678,
"step": 4100
},
{
"epoch": 7.0,
"grad_norm": 0.6441165804862976,
"learning_rate": 9.781420765027323e-06,
"loss": 0.0701,
"step": 4200
},
{
"epoch": 7.0,
"eval_administration_accuracy": 0.9396497811131957,
"eval_administration_f1": 0.9371884848945066,
"eval_corruption_accuracy": 0.9616426933500104,
"eval_corruption_f1": 0.9603149034436674,
"eval_democracy_accuracy": 0.9548676255993329,
"eval_democracy_f1": 0.9544642189136688,
"eval_development_accuracy": 0.919533041484261,
"eval_development_f1": 0.9174833579112073,
"eval_economy_accuracy": 0.9370439858244737,
"eval_economy_f1": 0.9350264441971636,
"eval_education_accuracy": 0.9670627475505524,
"eval_education_f1": 0.9657003505198045,
"eval_environment_accuracy": 0.9821763602251408,
"eval_environment_f1": 0.9808859370812918,
"eval_instability_accuracy": 0.9445486762559934,
"eval_instability_f1": 0.9432538618242539,
"eval_leadership_accuracy": 0.8441734417344173,
"eval_leadership_f1": 0.8426503273307849,
"eval_loss": 0.21108125150203705,
"eval_overall_accuracy": 0.9408397609617123,
"eval_overall_f1": 0.9393817348962156,
"eval_race_accuracy": 0.9566395663956639,
"eval_race_f1": 0.9559208826126899,
"eval_religion_accuracy": 0.9609130706691682,
"eval_religion_f1": 0.9608002073614549,
"eval_runtime": 10.9796,
"eval_safety_accuracy": 0.9218261413383365,
"eval_safety_f1": 0.9188918426640929,
"eval_samples_per_second": 873.8,
"eval_steps_per_second": 54.647,
"step": 4200
},
{
"epoch": 7.166736140058358,
"grad_norm": 0.6236246228218079,
"learning_rate": 9.234972677595628e-06,
"loss": 0.0597,
"step": 4300
},
{
"epoch": 7.333472280116715,
"grad_norm": 0.6468791365623474,
"learning_rate": 8.688524590163935e-06,
"loss": 0.0568,
"step": 4400
},
{
"epoch": 7.500208420175073,
"grad_norm": 0.7850766777992249,
"learning_rate": 8.14207650273224e-06,
"loss": 0.0586,
"step": 4500
},
{
"epoch": 7.66694456023343,
"grad_norm": 0.9113643169403076,
"learning_rate": 7.595628415300546e-06,
"loss": 0.0611,
"step": 4600
},
{
"epoch": 7.833680700291788,
"grad_norm": 0.9405547976493835,
"learning_rate": 7.049180327868853e-06,
"loss": 0.0636,
"step": 4700
},
{
"epoch": 8.0,
"grad_norm": 0.5414742827415466,
"learning_rate": 6.502732240437159e-06,
"loss": 0.0588,
"step": 4800
},
{
"epoch": 8.0,
"eval_administration_accuracy": 0.9413174900979779,
"eval_administration_f1": 0.9368824702703686,
"eval_corruption_accuracy": 0.9614342297269126,
"eval_corruption_f1": 0.9602768664022149,
"eval_democracy_accuracy": 0.9560141755263707,
"eval_democracy_f1": 0.9543585350199038,
"eval_development_accuracy": 0.9192203460496143,
"eval_development_f1": 0.9152605716450639,
"eval_economy_accuracy": 0.9381905357515113,
"eval_economy_f1": 0.935106524044037,
"eval_education_accuracy": 0.9688346883468835,
"eval_education_f1": 0.9673054511905975,
"eval_environment_accuracy": 0.9822805920366896,
"eval_environment_f1": 0.980929327817527,
"eval_instability_accuracy": 0.9455909943714822,
"eval_instability_f1": 0.9437005892876725,
"eval_leadership_accuracy": 0.8438607462997707,
"eval_leadership_f1": 0.8410810165791273,
"eval_loss": 0.215087428689003,
"eval_overall_accuracy": 0.9414651518310054,
"eval_overall_f1": 0.93924957060805,
"eval_race_accuracy": 0.9572649572649573,
"eval_race_f1": 0.9565627258185468,
"eval_religion_accuracy": 0.9615384615384616,
"eval_religion_f1": 0.9610239728595763,
"eval_runtime": 10.9675,
"eval_safety_accuracy": 0.9220346049614342,
"eval_safety_f1": 0.9185067963619639,
"eval_samples_per_second": 874.763,
"eval_steps_per_second": 54.707,
"step": 4800
},
{
"epoch": 8.166736140058358,
"grad_norm": 0.5899946093559265,
"learning_rate": 5.956284153005465e-06,
"loss": 0.0543,
"step": 4900
},
{
"epoch": 8.333472280116716,
"grad_norm": 0.5821051597595215,
"learning_rate": 5.409836065573771e-06,
"loss": 0.0528,
"step": 5000
},
{
"epoch": 8.500208420175072,
"grad_norm": 0.7321776151657104,
"learning_rate": 4.863387978142076e-06,
"loss": 0.0532,
"step": 5100
},
{
"epoch": 8.66694456023343,
"grad_norm": 0.7479367256164551,
"learning_rate": 4.316939890710383e-06,
"loss": 0.0519,
"step": 5200
},
{
"epoch": 8.833680700291788,
"grad_norm": 0.8334552049636841,
"learning_rate": 3.770491803278689e-06,
"loss": 0.0505,
"step": 5300
},
{
"epoch": 9.0,
"grad_norm": 0.7611594200134277,
"learning_rate": 3.2240437158469947e-06,
"loss": 0.0528,
"step": 5400
},
{
"epoch": 9.0,
"eval_administration_accuracy": 0.9398582447362935,
"eval_administration_f1": 0.9370835475227389,
"eval_corruption_accuracy": 0.9618511569731082,
"eval_corruption_f1": 0.9605413145422365,
"eval_democracy_accuracy": 0.9559099437148217,
"eval_democracy_f1": 0.9549404791061389,
"eval_development_accuracy": 0.9212007504690432,
"eval_development_f1": 0.9170777974760537,
"eval_economy_accuracy": 0.9381905357515113,
"eval_economy_f1": 0.93592555672295,
"eval_education_accuracy": 0.9682092974775902,
"eval_education_f1": 0.9667368725950756,
"eval_environment_accuracy": 0.9813425057327496,
"eval_environment_f1": 0.9802272421649649,
"eval_instability_accuracy": 0.9472587033562644,
"eval_instability_f1": 0.9448551688210497,
"eval_leadership_accuracy": 0.8438607462997707,
"eval_leadership_f1": 0.8412354371277554,
"eval_loss": 0.21861030161380768,
"eval_overall_accuracy": 0.9413609200194567,
"eval_overall_f1": 0.9394140341920821,
"eval_race_accuracy": 0.9572649572649573,
"eval_race_f1": 0.9563061857501846,
"eval_religion_accuracy": 0.961955388784657,
"eval_religion_f1": 0.9612659459831671,
"eval_runtime": 11.1696,
"eval_safety_accuracy": 0.9194288096727121,
"eval_safety_f1": 0.9167728624926694,
"eval_samples_per_second": 858.939,
"eval_steps_per_second": 53.717,
"step": 5400
},
{
"epoch": 9.166736140058358,
"grad_norm": 0.5734995007514954,
"learning_rate": 2.6775956284153005e-06,
"loss": 0.0483,
"step": 5500
},
{
"epoch": 9.333472280116716,
"grad_norm": 0.4083118736743927,
"learning_rate": 2.1311475409836063e-06,
"loss": 0.0491,
"step": 5600
},
{
"epoch": 9.500208420175072,
"grad_norm": 0.6585514545440674,
"learning_rate": 1.5846994535519126e-06,
"loss": 0.0488,
"step": 5700
},
{
"epoch": 9.66694456023343,
"grad_norm": 0.8815754055976868,
"learning_rate": 1.0382513661202186e-06,
"loss": 0.0485,
"step": 5800
},
{
"epoch": 9.833680700291788,
"grad_norm": 0.7910645008087158,
"learning_rate": 4.918032786885246e-07,
"loss": 0.0475,
"step": 5900
},
{
"epoch": 9.98374322634431,
"eval_administration_accuracy": 0.9398582447362935,
"eval_administration_f1": 0.9361942162273019,
"eval_corruption_accuracy": 0.9618511569731082,
"eval_corruption_f1": 0.9605486831469265,
"eval_democracy_accuracy": 0.9554930164686263,
"eval_democracy_f1": 0.9543821493687584,
"eval_development_accuracy": 0.9214092140921409,
"eval_development_f1": 0.9172870275429638,
"eval_economy_accuracy": 0.9368355222013759,
"eval_economy_f1": 0.9345244446666318,
"eval_education_accuracy": 0.9690431519699813,
"eval_education_f1": 0.9674369063891264,
"eval_environment_accuracy": 0.9807171148634564,
"eval_environment_f1": 0.9797111999641163,
"eval_instability_accuracy": 0.9462163852407754,
"eval_instability_f1": 0.9447235769030393,
"eval_leadership_accuracy": 0.8455284552845529,
"eval_leadership_f1": 0.8428711309576332,
"eval_loss": 0.22077877819538116,
"eval_overall_accuracy": 0.9413956639566394,
"eval_overall_f1": 0.9394484894664766,
"eval_race_accuracy": 0.9575776526996039,
"eval_race_f1": 0.9567385833513858,
"eval_religion_accuracy": 0.962059620596206,
"eval_religion_f1": 0.9614110923085984,
"eval_runtime": 11.0829,
"eval_safety_accuracy": 0.9201584323535543,
"eval_safety_f1": 0.9175528627712379,
"eval_samples_per_second": 865.657,
"eval_steps_per_second": 54.137,
"step": 5990
}
],
"logging_steps": 100,
"max_steps": 5990,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 2,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.932543652062822e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": {
"gradient_accumulation_steps": 4,
"learning_rate": 3e-05,
"num_train_epochs": 10
}
}