| { |
| "best_global_step": 7200, |
| "best_metric": 0.9362734744251768, |
| "best_model_checkpoint": "./results/run-2/checkpoint-7200", |
| "epoch": 7.0, |
| "eval_steps": 500, |
| "global_step": 8400, |
| "is_hyper_param_search": true, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.08336807002917883, |
| "grad_norm": 1.2878578901290894, |
| "learning_rate": 1.4e-05, |
| "loss": 1.2644, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.16673614005835766, |
| "grad_norm": 1.0009047985076904, |
| "learning_rate": 2.8e-05, |
| "loss": 0.4758, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.25010421008753647, |
| "grad_norm": 0.7635347843170166, |
| "learning_rate": 4.2e-05, |
| "loss": 0.4054, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.3334722801167153, |
| "grad_norm": 0.8168604373931885, |
| "learning_rate": 5.6e-05, |
| "loss": 0.3468, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.4168403501458941, |
| "grad_norm": 1.1069319248199463, |
| "learning_rate": 7e-05, |
| "loss": 0.3051, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5002084201750729, |
| "grad_norm": 1.015086054801941, |
| "learning_rate": 6.962534789124383e-05, |
| "loss": 0.3044, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5835764902042517, |
| "grad_norm": 0.8430781364440918, |
| "learning_rate": 6.925069578248769e-05, |
| "loss": 0.2742, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.6669445602334306, |
| "grad_norm": 0.950409471988678, |
| "learning_rate": 6.887604367373152e-05, |
| "loss": 0.2573, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.7503126302626094, |
| "grad_norm": 1.171885371208191, |
| "learning_rate": 6.850139156497538e-05, |
| "loss": 0.2517, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.8336807002917882, |
| "grad_norm": 0.9133914709091187, |
| "learning_rate": 6.812673945621922e-05, |
| "loss": 0.2435, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.9170487703209671, |
| "grad_norm": 0.7939682602882385, |
| "learning_rate": 6.775208734746307e-05, |
| "loss": 0.2373, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.49670639634132385, |
| "learning_rate": 6.737743523870691e-05, |
| "loss": 0.2363, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_administration_accuracy": 0.9302689180737961, |
| "eval_administration_f1": 0.9077189587684091, |
| "eval_corruption_accuracy": 0.9534083802376485, |
| "eval_corruption_f1": 0.9474431988592255, |
| "eval_democracy_accuracy": 0.9473629351678132, |
| "eval_democracy_f1": 0.9358042521135516, |
| "eval_development_accuracy": 0.9094225557640192, |
| "eval_development_f1": 0.8937523747915405, |
| "eval_economy_accuracy": 0.924119241192412, |
| "eval_economy_f1": 0.9156575066932242, |
| "eval_education_accuracy": 0.9621638524077548, |
| "eval_education_f1": 0.9583809787976314, |
| "eval_environment_accuracy": 0.9775901605169898, |
| "eval_environment_f1": 0.9754739686963207, |
| "eval_instability_accuracy": 0.9422555764019178, |
| "eval_instability_f1": 0.9343996801103537, |
| "eval_leadership_accuracy": 0.7970606629143214, |
| "eval_leadership_f1": 0.7702375495381067, |
| "eval_loss": 0.22745274007320404, |
| "eval_overall_accuracy": 0.9296261552359112, |
| "eval_overall_f1": 0.9199542168750479, |
| "eval_race_accuracy": 0.9505941213258287, |
| "eval_race_f1": 0.9483610849847235, |
| "eval_religion_accuracy": 0.9505941213258287, |
| "eval_religion_f1": 0.9489643730683611, |
| "eval_runtime": 11.0331, |
| "eval_safety_accuracy": 0.9106733375026058, |
| "eval_safety_f1": 0.9032566760791263, |
| "eval_samples_per_second": 869.566, |
| "eval_steps_per_second": 54.382, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.0833680700291788, |
| "grad_norm": 0.949184238910675, |
| "learning_rate": 6.700278312995076e-05, |
| "loss": 0.1968, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.1667361400583576, |
| "grad_norm": 0.7735128402709961, |
| "learning_rate": 6.66281310211946e-05, |
| "loss": 0.1924, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.2501042100875366, |
| "grad_norm": 1.0187016725540161, |
| "learning_rate": 6.625347891243845e-05, |
| "loss": 0.1945, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.3334722801167154, |
| "grad_norm": 0.9707878828048706, |
| "learning_rate": 6.587882680368229e-05, |
| "loss": 0.1931, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.4168403501458942, |
| "grad_norm": 1.4602934122085571, |
| "learning_rate": 6.550417469492613e-05, |
| "loss": 0.1967, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.500208420175073, |
| "grad_norm": 1.2449183464050293, |
| "learning_rate": 6.512952258616998e-05, |
| "loss": 0.1843, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.5835764902042517, |
| "grad_norm": 1.4943019151687622, |
| "learning_rate": 6.475487047741382e-05, |
| "loss": 0.1844, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.6669445602334307, |
| "grad_norm": 1.8496220111846924, |
| "learning_rate": 6.438021836865767e-05, |
| "loss": 0.1797, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.7503126302626093, |
| "grad_norm": 1.0578210353851318, |
| "learning_rate": 6.400556625990151e-05, |
| "loss": 0.1875, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.8336807002917883, |
| "grad_norm": 1.0538480281829834, |
| "learning_rate": 6.363091415114536e-05, |
| "loss": 0.1685, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.917048770320967, |
| "grad_norm": 1.5351332426071167, |
| "learning_rate": 6.32562620423892e-05, |
| "loss": 0.1744, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.9578828811645508, |
| "learning_rate": 6.288160993363306e-05, |
| "loss": 0.1811, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_administration_accuracy": 0.9344381905357515, |
| "eval_administration_f1": 0.9296143144580357, |
| "eval_corruption_accuracy": 0.9604961434229727, |
| "eval_corruption_f1": 0.9577803781696625, |
| "eval_democracy_accuracy": 0.9534083802376485, |
| "eval_democracy_f1": 0.9434755362160818, |
| "eval_development_accuracy": 0.9193245778611632, |
| "eval_development_f1": 0.9132597892423039, |
| "eval_economy_accuracy": 0.931311236189285, |
| "eval_economy_f1": 0.92884213698996, |
| "eval_education_accuracy": 0.9653950385657702, |
| "eval_education_f1": 0.963862144635937, |
| "eval_environment_accuracy": 0.979153637690223, |
| "eval_environment_f1": 0.9783610154594226, |
| "eval_instability_accuracy": 0.9459036898061288, |
| "eval_instability_f1": 0.9423381513536221, |
| "eval_leadership_accuracy": 0.8387533875338753, |
| "eval_leadership_f1": 0.8323519207670947, |
| "eval_loss": 0.2031705528497696, |
| "eval_overall_accuracy": 0.9372177055103883, |
| "eval_overall_f1": 0.9339233416905627, |
| "eval_race_accuracy": 0.9516364394413175, |
| "eval_race_f1": 0.9498984068182696, |
| "eval_religion_accuracy": 0.9558057119032729, |
| "eval_religion_f1": 0.9554428854878803, |
| "eval_runtime": 11.0378, |
| "eval_safety_accuracy": 0.9109860329372524, |
| "eval_safety_f1": 0.9118534206884823, |
| "eval_samples_per_second": 869.197, |
| "eval_steps_per_second": 54.359, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.083368070029179, |
| "grad_norm": 1.0708891153335571, |
| "learning_rate": 6.25069578248769e-05, |
| "loss": 0.1338, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.1667361400583576, |
| "grad_norm": 1.0800749063491821, |
| "learning_rate": 6.213230571612073e-05, |
| "loss": 0.1268, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.2501042100875366, |
| "grad_norm": 1.3577998876571655, |
| "learning_rate": 6.175765360736459e-05, |
| "loss": 0.1226, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.333472280116715, |
| "grad_norm": 1.325534701347351, |
| "learning_rate": 6.138300149860843e-05, |
| "loss": 0.1258, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.416840350145894, |
| "grad_norm": 1.7846063375473022, |
| "learning_rate": 6.100834938985228e-05, |
| "loss": 0.1308, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.500208420175073, |
| "grad_norm": 1.61028254032135, |
| "learning_rate": 6.0633697281096124e-05, |
| "loss": 0.1305, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.5835764902042517, |
| "grad_norm": 1.2228693962097168, |
| "learning_rate": 6.025904517233996e-05, |
| "loss": 0.1315, |
| "step": 3100 |
| }, |
| { |
| "epoch": 2.6669445602334307, |
| "grad_norm": 0.9486532807350159, |
| "learning_rate": 5.988439306358381e-05, |
| "loss": 0.1337, |
| "step": 3200 |
| }, |
| { |
| "epoch": 2.7503126302626093, |
| "grad_norm": 1.3199305534362793, |
| "learning_rate": 5.9509740954827654e-05, |
| "loss": 0.132, |
| "step": 3300 |
| }, |
| { |
| "epoch": 2.8336807002917883, |
| "grad_norm": 1.8821613788604736, |
| "learning_rate": 5.91350888460715e-05, |
| "loss": 0.1258, |
| "step": 3400 |
| }, |
| { |
| "epoch": 2.9170487703209673, |
| "grad_norm": 1.2816129922866821, |
| "learning_rate": 5.8760436737315346e-05, |
| "loss": 0.1282, |
| "step": 3500 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 5.737521171569824, |
| "learning_rate": 5.838578462855919e-05, |
| "loss": 0.1297, |
| "step": 3600 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_administration_accuracy": 0.9375651448822181, |
| "eval_administration_f1": 0.9309689641816119, |
| "eval_corruption_accuracy": 0.9451740671252866, |
| "eval_corruption_f1": 0.9476001634812502, |
| "eval_democracy_accuracy": 0.9508025849489264, |
| "eval_democracy_f1": 0.9508783301626133, |
| "eval_development_accuracy": 0.921304982280592, |
| "eval_development_f1": 0.9145210502797096, |
| "eval_economy_accuracy": 0.930373149885345, |
| "eval_economy_f1": 0.9257602899924291, |
| "eval_education_accuracy": 0.9670627475505524, |
| "eval_education_f1": 0.9651765113114141, |
| "eval_environment_accuracy": 0.9805086512403586, |
| "eval_environment_f1": 0.9785021531425564, |
| "eval_instability_accuracy": 0.9419428809672712, |
| "eval_instability_f1": 0.9412425903038227, |
| "eval_leadership_accuracy": 0.8284344381905357, |
| "eval_leadership_f1": 0.8287745355631846, |
| "eval_loss": 0.21378232538700104, |
| "eval_overall_accuracy": 0.9360450976304634, |
| "eval_overall_f1": 0.9337781089602167, |
| "eval_race_accuracy": 0.9541380029184907, |
| "eval_race_f1": 0.9510004306979282, |
| "eval_religion_accuracy": 0.955701480091724, |
| "eval_religion_f1": 0.9539633431565642, |
| "eval_runtime": 11.1303, |
| "eval_safety_accuracy": 0.919533041484261, |
| "eval_safety_f1": 0.9169489452495161, |
| "eval_samples_per_second": 861.972, |
| "eval_steps_per_second": 53.907, |
| "step": 3600 |
| }, |
| { |
| "epoch": 3.083368070029179, |
| "grad_norm": 0.7353236079216003, |
| "learning_rate": 5.801113251980304e-05, |
| "loss": 0.0912, |
| "step": 3700 |
| }, |
| { |
| "epoch": 3.1667361400583576, |
| "grad_norm": 1.2753976583480835, |
| "learning_rate": 5.763648041104688e-05, |
| "loss": 0.0954, |
| "step": 3800 |
| }, |
| { |
| "epoch": 3.2501042100875366, |
| "grad_norm": 1.4709268808364868, |
| "learning_rate": 5.726182830229073e-05, |
| "loss": 0.0936, |
| "step": 3900 |
| }, |
| { |
| "epoch": 3.333472280116715, |
| "grad_norm": 0.8533086180686951, |
| "learning_rate": 5.6887176193534574e-05, |
| "loss": 0.0927, |
| "step": 4000 |
| }, |
| { |
| "epoch": 3.416840350145894, |
| "grad_norm": 1.1082295179367065, |
| "learning_rate": 5.6512524084778413e-05, |
| "loss": 0.091, |
| "step": 4100 |
| }, |
| { |
| "epoch": 3.500208420175073, |
| "grad_norm": 1.3943876028060913, |
| "learning_rate": 5.613787197602226e-05, |
| "loss": 0.0966, |
| "step": 4200 |
| }, |
| { |
| "epoch": 3.5835764902042517, |
| "grad_norm": 1.483786940574646, |
| "learning_rate": 5.5763219867266105e-05, |
| "loss": 0.0965, |
| "step": 4300 |
| }, |
| { |
| "epoch": 3.6669445602334307, |
| "grad_norm": 1.0739402770996094, |
| "learning_rate": 5.538856775850995e-05, |
| "loss": 0.0988, |
| "step": 4400 |
| }, |
| { |
| "epoch": 3.7503126302626093, |
| "grad_norm": 1.2540950775146484, |
| "learning_rate": 5.5013915649753796e-05, |
| "loss": 0.0921, |
| "step": 4500 |
| }, |
| { |
| "epoch": 3.8336807002917883, |
| "grad_norm": 1.5991489887237549, |
| "learning_rate": 5.463926354099764e-05, |
| "loss": 0.1044, |
| "step": 4600 |
| }, |
| { |
| "epoch": 3.9170487703209673, |
| "grad_norm": 1.71983003616333, |
| "learning_rate": 5.426461143224149e-05, |
| "loss": 0.0891, |
| "step": 4700 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.6225905418395996, |
| "learning_rate": 5.3889959323485334e-05, |
| "loss": 0.0935, |
| "step": 4800 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_administration_accuracy": 0.9401709401709402, |
| "eval_administration_f1": 0.9351367023709064, |
| "eval_corruption_accuracy": 0.9568480300187617, |
| "eval_corruption_f1": 0.9560031856416787, |
| "eval_democracy_accuracy": 0.9563268709610173, |
| "eval_democracy_f1": 0.9539147719955986, |
| "eval_development_accuracy": 0.9181780279341255, |
| "eval_development_f1": 0.9124572322058647, |
| "eval_economy_accuracy": 0.929435063581405, |
| "eval_economy_f1": 0.9284126365185199, |
| "eval_education_accuracy": 0.9672712111736502, |
| "eval_education_f1": 0.9657318355589747, |
| "eval_environment_accuracy": 0.9805086512403586, |
| "eval_environment_f1": 0.9793120114242748, |
| "eval_instability_accuracy": 0.9337085678549093, |
| "eval_instability_f1": 0.9360007265571828, |
| "eval_leadership_accuracy": 0.8413591828225975, |
| "eval_leadership_f1": 0.8351481808317307, |
| "eval_loss": 0.22106842696666718, |
| "eval_overall_accuracy": 0.9382079077201028, |
| "eval_overall_f1": 0.9358746589615409, |
| "eval_race_accuracy": 0.9561184073379195, |
| "eval_race_f1": 0.9543792719379139, |
| "eval_religion_accuracy": 0.9570564936418595, |
| "eval_religion_f1": 0.9560165149767178, |
| "eval_runtime": 10.9937, |
| "eval_safety_accuracy": 0.9215134459036898, |
| "eval_safety_f1": 0.9179828375191263, |
| "eval_samples_per_second": 872.679, |
| "eval_steps_per_second": 54.577, |
| "step": 4800 |
| }, |
| { |
| "epoch": 4.083368070029179, |
| "grad_norm": 0.6198378801345825, |
| "learning_rate": 5.351530721472918e-05, |
| "loss": 0.0658, |
| "step": 4900 |
| }, |
| { |
| "epoch": 4.166736140058358, |
| "grad_norm": 1.2718008756637573, |
| "learning_rate": 5.3140655105973025e-05, |
| "loss": 0.0636, |
| "step": 5000 |
| }, |
| { |
| "epoch": 4.250104210087536, |
| "grad_norm": 1.476962685585022, |
| "learning_rate": 5.2766002997216864e-05, |
| "loss": 0.0742, |
| "step": 5100 |
| }, |
| { |
| "epoch": 4.333472280116715, |
| "grad_norm": 1.1485812664031982, |
| "learning_rate": 5.239135088846071e-05, |
| "loss": 0.0693, |
| "step": 5200 |
| }, |
| { |
| "epoch": 4.416840350145894, |
| "grad_norm": 1.2767895460128784, |
| "learning_rate": 5.2016698779704556e-05, |
| "loss": 0.0722, |
| "step": 5300 |
| }, |
| { |
| "epoch": 4.500208420175073, |
| "grad_norm": 0.6865003108978271, |
| "learning_rate": 5.16420466709484e-05, |
| "loss": 0.0701, |
| "step": 5400 |
| }, |
| { |
| "epoch": 4.583576490204251, |
| "grad_norm": 0.6567270755767822, |
| "learning_rate": 5.127114108327981e-05, |
| "loss": 0.069, |
| "step": 5500 |
| }, |
| { |
| "epoch": 4.66694456023343, |
| "grad_norm": 1.1187883615493774, |
| "learning_rate": 5.0896488974523654e-05, |
| "loss": 0.0716, |
| "step": 5600 |
| }, |
| { |
| "epoch": 4.750312630262609, |
| "grad_norm": 1.3242968320846558, |
| "learning_rate": 5.05218368657675e-05, |
| "loss": 0.0702, |
| "step": 5700 |
| }, |
| { |
| "epoch": 4.833680700291788, |
| "grad_norm": 1.458292841911316, |
| "learning_rate": 5.0147184757011346e-05, |
| "loss": 0.07, |
| "step": 5800 |
| }, |
| { |
| "epoch": 4.917048770320967, |
| "grad_norm": 0.7626857757568359, |
| "learning_rate": 4.977253264825519e-05, |
| "loss": 0.0733, |
| "step": 5900 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.7239888310432434, |
| "learning_rate": 4.939788053949904e-05, |
| "loss": 0.0675, |
| "step": 6000 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_administration_accuracy": 0.9306858453199917, |
| "eval_administration_f1": 0.9286500450585223, |
| "eval_corruption_accuracy": 0.9606003752345216, |
| "eval_corruption_f1": 0.9583563972269709, |
| "eval_democracy_accuracy": 0.9416301855326246, |
| "eval_democracy_f1": 0.9449257041063754, |
| "eval_development_accuracy": 0.9198457369189077, |
| "eval_development_f1": 0.9154714581737773, |
| "eval_economy_accuracy": 0.9344381905357515, |
| "eval_economy_f1": 0.9306995601866777, |
| "eval_education_accuracy": 0.9673754429851991, |
| "eval_education_f1": 0.9648349073741065, |
| "eval_environment_accuracy": 0.9803001876172608, |
| "eval_environment_f1": 0.9792105415103222, |
| "eval_instability_accuracy": 0.944757139879091, |
| "eval_instability_f1": 0.9391731690067155, |
| "eval_leadership_accuracy": 0.8202001250781739, |
| "eval_leadership_f1": 0.8225964840638132, |
| "eval_loss": 0.2460281401872635, |
| "eval_overall_accuracy": 0.9360364116461678, |
| "eval_overall_f1": 0.9339890155373726, |
| "eval_race_accuracy": 0.9508025849489264, |
| "eval_race_f1": 0.9501627076296991, |
| "eval_religion_accuracy": 0.9597665207421305, |
| "eval_religion_f1": 0.9575416931106416, |
| "eval_runtime": 10.9751, |
| "eval_safety_accuracy": 0.9220346049614342, |
| "eval_safety_f1": 0.916245519000848, |
| "eval_samples_per_second": 874.157, |
| "eval_steps_per_second": 54.669, |
| "step": 6000 |
| }, |
| { |
| "epoch": 5.083368070029179, |
| "grad_norm": 1.2741373777389526, |
| "learning_rate": 4.902322843074288e-05, |
| "loss": 0.0551, |
| "step": 6100 |
| }, |
| { |
| "epoch": 5.166736140058358, |
| "grad_norm": 0.5817021727561951, |
| "learning_rate": 4.864857632198673e-05, |
| "loss": 0.0505, |
| "step": 6200 |
| }, |
| { |
| "epoch": 5.250104210087536, |
| "grad_norm": 1.0689315795898438, |
| "learning_rate": 4.827392421323057e-05, |
| "loss": 0.049, |
| "step": 6300 |
| }, |
| { |
| "epoch": 5.333472280116715, |
| "grad_norm": 1.5673719644546509, |
| "learning_rate": 4.789927210447441e-05, |
| "loss": 0.0554, |
| "step": 6400 |
| }, |
| { |
| "epoch": 5.416840350145894, |
| "grad_norm": 1.1274124383926392, |
| "learning_rate": 4.752461999571826e-05, |
| "loss": 0.0523, |
| "step": 6500 |
| }, |
| { |
| "epoch": 5.500208420175073, |
| "grad_norm": 0.836388349533081, |
| "learning_rate": 4.7149967886962105e-05, |
| "loss": 0.0516, |
| "step": 6600 |
| }, |
| { |
| "epoch": 5.583576490204251, |
| "grad_norm": 1.7012029886245728, |
| "learning_rate": 4.677531577820595e-05, |
| "loss": 0.0559, |
| "step": 6700 |
| }, |
| { |
| "epoch": 5.66694456023343, |
| "grad_norm": 1.0244345664978027, |
| "learning_rate": 4.6400663669449796e-05, |
| "loss": 0.0552, |
| "step": 6800 |
| }, |
| { |
| "epoch": 5.750312630262609, |
| "grad_norm": 1.2528122663497925, |
| "learning_rate": 4.602601156069364e-05, |
| "loss": 0.0508, |
| "step": 6900 |
| }, |
| { |
| "epoch": 5.833680700291788, |
| "grad_norm": 1.044662594795227, |
| "learning_rate": 4.565135945193749e-05, |
| "loss": 0.0531, |
| "step": 7000 |
| }, |
| { |
| "epoch": 5.917048770320967, |
| "grad_norm": 0.8813854455947876, |
| "learning_rate": 4.5276707343181333e-05, |
| "loss": 0.0482, |
| "step": 7100 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 1.4556822776794434, |
| "learning_rate": 4.490205523442518e-05, |
| "loss": 0.0516, |
| "step": 7200 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_administration_accuracy": 0.9356889722743381, |
| "eval_administration_f1": 0.9317820245044639, |
| "eval_corruption_accuracy": 0.9579945799457995, |
| "eval_corruption_f1": 0.9568343721711066, |
| "eval_democracy_accuracy": 0.9589326662497394, |
| "eval_democracy_f1": 0.95607216308818, |
| "eval_development_accuracy": 0.9168230143839899, |
| "eval_development_f1": 0.9143401221807914, |
| "eval_economy_accuracy": 0.9308943089430894, |
| "eval_economy_f1": 0.9294738147048666, |
| "eval_education_accuracy": 0.9669585157390036, |
| "eval_education_f1": 0.965464450546169, |
| "eval_environment_accuracy": 0.9786324786324786, |
| "eval_environment_f1": 0.977778980162068, |
| "eval_instability_accuracy": 0.9441317490097978, |
| "eval_instability_f1": 0.9395705042517813, |
| "eval_leadership_accuracy": 0.8412549510110485, |
| "eval_leadership_f1": 0.8395230039371011, |
| "eval_loss": 0.26496145129203796, |
| "eval_overall_accuracy": 0.938372941421722, |
| "eval_overall_f1": 0.9362734744251768, |
| "eval_race_accuracy": 0.9541380029184907, |
| "eval_race_f1": 0.9519598766506976, |
| "eval_religion_accuracy": 0.9579945799457995, |
| "eval_religion_f1": 0.9568944771373694, |
| "eval_runtime": 10.8844, |
| "eval_safety_accuracy": 0.9170314780070877, |
| "eval_safety_f1": 0.915587903767528, |
| "eval_samples_per_second": 881.445, |
| "eval_steps_per_second": 55.125, |
| "step": 7200 |
| }, |
| { |
| "epoch": 6.083368070029179, |
| "grad_norm": 0.7286815643310547, |
| "learning_rate": 4.452740312566902e-05, |
| "loss": 0.0366, |
| "step": 7300 |
| }, |
| { |
| "epoch": 6.166736140058358, |
| "grad_norm": 2.7514851093292236, |
| "learning_rate": 4.4152751016912864e-05, |
| "loss": 0.0392, |
| "step": 7400 |
| }, |
| { |
| "epoch": 6.250104210087536, |
| "grad_norm": 1.0808284282684326, |
| "learning_rate": 4.377809890815671e-05, |
| "loss": 0.0365, |
| "step": 7500 |
| }, |
| { |
| "epoch": 6.333472280116715, |
| "grad_norm": 0.5677124261856079, |
| "learning_rate": 4.3403446799400555e-05, |
| "loss": 0.0439, |
| "step": 7600 |
| }, |
| { |
| "epoch": 6.416840350145894, |
| "grad_norm": 0.7869217991828918, |
| "learning_rate": 4.30287946906444e-05, |
| "loss": 0.034, |
| "step": 7700 |
| }, |
| { |
| "epoch": 6.500208420175073, |
| "grad_norm": 0.6197104454040527, |
| "learning_rate": 4.265414258188825e-05, |
| "loss": 0.0375, |
| "step": 7800 |
| }, |
| { |
| "epoch": 6.583576490204251, |
| "grad_norm": 1.126968264579773, |
| "learning_rate": 4.228323699421965e-05, |
| "loss": 0.0437, |
| "step": 7900 |
| }, |
| { |
| "epoch": 6.66694456023343, |
| "grad_norm": 0.6215279698371887, |
| "learning_rate": 4.190858488546349e-05, |
| "loss": 0.0413, |
| "step": 8000 |
| }, |
| { |
| "epoch": 6.750312630262609, |
| "grad_norm": 1.0289812088012695, |
| "learning_rate": 4.153393277670734e-05, |
| "loss": 0.0383, |
| "step": 8100 |
| }, |
| { |
| "epoch": 6.833680700291788, |
| "grad_norm": 1.2618753910064697, |
| "learning_rate": 4.1159280667951184e-05, |
| "loss": 0.037, |
| "step": 8200 |
| }, |
| { |
| "epoch": 6.917048770320967, |
| "grad_norm": 0.48271915316581726, |
| "learning_rate": 4.078462855919502e-05, |
| "loss": 0.0403, |
| "step": 8300 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 1.4230297803878784, |
| "learning_rate": 4.040997645043887e-05, |
| "loss": 0.0421, |
| "step": 8400 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_administration_accuracy": 0.9262038774233896, |
| "eval_administration_f1": 0.9271433160043757, |
| "eval_corruption_accuracy": 0.9570564936418595, |
| "eval_corruption_f1": 0.9567097147624816, |
| "eval_democracy_accuracy": 0.9531999166145507, |
| "eval_democracy_f1": 0.9521445432226068, |
| "eval_development_accuracy": 0.9057744423598082, |
| "eval_development_f1": 0.9056708555901262, |
| "eval_economy_accuracy": 0.9282885136543673, |
| "eval_economy_f1": 0.926345436006994, |
| "eval_education_accuracy": 0.962059620596206, |
| "eval_education_f1": 0.961835890625152, |
| "eval_environment_accuracy": 0.9805086512403586, |
| "eval_environment_f1": 0.9790877092918502, |
| "eval_instability_accuracy": 0.9382947675630603, |
| "eval_instability_f1": 0.9376909954205174, |
| "eval_leadership_accuracy": 0.8362518240567021, |
| "eval_leadership_f1": 0.8378707787677608, |
| "eval_loss": 0.2829027473926544, |
| "eval_overall_accuracy": 0.9348638037662428, |
| "eval_overall_f1": 0.9341947646002294, |
| "eval_race_accuracy": 0.9518449030644153, |
| "eval_race_f1": 0.9506769394120873, |
| "eval_religion_accuracy": 0.9585157390035439, |
| "eval_religion_f1": 0.9579338409216822, |
| "eval_runtime": 10.9473, |
| "eval_safety_accuracy": 0.9203668959766521, |
| "eval_safety_f1": 0.917227155177119, |
| "eval_samples_per_second": 876.38, |
| "eval_steps_per_second": 54.808, |
| "step": 8400 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 19184, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 16, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 2, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 1 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.070187082265395e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": { |
| "gradient_accumulation_steps": 2, |
| "learning_rate": 7e-05, |
| "num_train_epochs": 16 |
| } |
| } |
|
|