| { |
| "best_global_step": 5990, |
| "best_metric": 0.9394484894664766, |
| "best_model_checkpoint": "./results/run-0/checkpoint-5990", |
| "epoch": 9.98374322634431, |
| "eval_steps": 500, |
| "global_step": 5990, |
| "is_hyper_param_search": true, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.16673614005835766, |
| "grad_norm": 2.8743319511413574, |
| "learning_rate": 6e-06, |
| "loss": 1.4727, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3334722801167153, |
| "grad_norm": 0.8747783899307251, |
| "learning_rate": 1.2e-05, |
| "loss": 0.617, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5002084201750729, |
| "grad_norm": 0.7884620428085327, |
| "learning_rate": 1.8e-05, |
| "loss": 0.4493, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6669445602334306, |
| "grad_norm": 0.8924545645713806, |
| "learning_rate": 2.4e-05, |
| "loss": 0.3876, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8336807002917882, |
| "grad_norm": 0.8031777739524841, |
| "learning_rate": 3e-05, |
| "loss": 0.3361, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.5999459624290466, |
| "learning_rate": 2.9453551912568308e-05, |
| "loss": 0.3004, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_administration_accuracy": 0.9140087554721701, |
| "eval_administration_f1": 0.8740363354524873, |
| "eval_corruption_accuracy": 0.9370439858244737, |
| "eval_corruption_f1": 0.9192595640701061, |
| "eval_democracy_accuracy": 0.9347508859703981, |
| "eval_democracy_f1": 0.9095054688749907, |
| "eval_development_accuracy": 0.8763810715030227, |
| "eval_development_f1": 0.8253993969919758, |
| "eval_economy_accuracy": 0.9010840108401084, |
| "eval_economy_f1": 0.8737219947600353, |
| "eval_education_accuracy": 0.9571607254534084, |
| "eval_education_f1": 0.9485581438064299, |
| "eval_environment_accuracy": 0.9734208880550344, |
| "eval_environment_f1": 0.9693817386774968, |
| "eval_instability_accuracy": 0.9219303731498854, |
| "eval_instability_f1": 0.8878400653107733, |
| "eval_leadership_accuracy": 0.7652699603919116, |
| "eval_leadership_f1": 0.7080044131422123, |
| "eval_loss": 0.2839118242263794, |
| "eval_overall_accuracy": 0.9120109790841496, |
| "eval_overall_f1": 0.8849771384459072, |
| "eval_race_accuracy": 0.9283927454659162, |
| "eval_race_f1": 0.907176591968042, |
| "eval_religion_accuracy": 0.9369397540129247, |
| "eval_religion_f1": 0.9217332965382043, |
| "eval_runtime": 10.9229, |
| "eval_safety_accuracy": 0.8977485928705441, |
| "eval_safety_f1": 0.8751086517581315, |
| "eval_samples_per_second": 878.336, |
| "eval_steps_per_second": 54.93, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.1667361400583576, |
| "grad_norm": 0.6991692781448364, |
| "learning_rate": 2.890710382513661e-05, |
| "loss": 0.2667, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.3334722801167154, |
| "grad_norm": 0.6808522343635559, |
| "learning_rate": 2.836065573770492e-05, |
| "loss": 0.2573, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.500208420175073, |
| "grad_norm": 0.8924445509910583, |
| "learning_rate": 2.7814207650273226e-05, |
| "loss": 0.2447, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.6669445602334307, |
| "grad_norm": 0.9563364386558533, |
| "learning_rate": 2.7267759562841533e-05, |
| "loss": 0.2282, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.8336807002917883, |
| "grad_norm": 0.8881794810295105, |
| "learning_rate": 2.6721311475409837e-05, |
| "loss": 0.2217, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.2946265935897827, |
| "learning_rate": 2.6174863387978144e-05, |
| "loss": 0.2197, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_administration_accuracy": 0.9364185949551803, |
| "eval_administration_f1": 0.9264664888238818, |
| "eval_corruption_accuracy": 0.9562226391494684, |
| "eval_corruption_f1": 0.9508457468931831, |
| "eval_democracy_accuracy": 0.947571398790911, |
| "eval_democracy_f1": 0.9355668234958175, |
| "eval_development_accuracy": 0.9049405878674172, |
| "eval_development_f1": 0.8818640965909518, |
| "eval_economy_accuracy": 0.924119241192412, |
| "eval_economy_f1": 0.9144295841480942, |
| "eval_education_accuracy": 0.9669585157390036, |
| "eval_education_f1": 0.9649722301773493, |
| "eval_environment_accuracy": 0.981029810298103, |
| "eval_environment_f1": 0.9786767163906938, |
| "eval_instability_accuracy": 0.9422555764019178, |
| "eval_instability_f1": 0.9320449284299851, |
| "eval_leadership_accuracy": 0.8006045445069835, |
| "eval_leadership_f1": 0.7682063698028097, |
| "eval_loss": 0.21895167231559753, |
| "eval_overall_accuracy": 0.9317629073726635, |
| "eval_overall_f1": 0.9213111296186804, |
| "eval_race_accuracy": 0.9516364394413175, |
| "eval_race_f1": 0.9462871699636376, |
| "eval_religion_accuracy": 0.9586199708150928, |
| "eval_religion_f1": 0.9568464833874664, |
| "eval_runtime": 10.9295, |
| "eval_safety_accuracy": 0.9107775693141547, |
| "eval_safety_f1": 0.8995269173202963, |
| "eval_samples_per_second": 877.809, |
| "eval_steps_per_second": 54.897, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.1667361400583576, |
| "grad_norm": 0.8773576021194458, |
| "learning_rate": 2.5628415300546447e-05, |
| "loss": 0.1841, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.333472280116715, |
| "grad_norm": 0.7541666030883789, |
| "learning_rate": 2.5081967213114754e-05, |
| "loss": 0.1757, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.500208420175073, |
| "grad_norm": 0.9500707983970642, |
| "learning_rate": 2.453551912568306e-05, |
| "loss": 0.1776, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.6669445602334307, |
| "grad_norm": 0.8599845170974731, |
| "learning_rate": 2.398907103825137e-05, |
| "loss": 0.1757, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.8336807002917883, |
| "grad_norm": 1.0130952596664429, |
| "learning_rate": 2.3442622950819672e-05, |
| "loss": 0.1675, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 1.4208967685699463, |
| "learning_rate": 2.289617486338798e-05, |
| "loss": 0.1664, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_administration_accuracy": 0.940379403794038, |
| "eval_administration_f1": 0.9332522249114288, |
| "eval_corruption_accuracy": 0.9609130706691682, |
| "eval_corruption_f1": 0.9591571276198037, |
| "eval_democracy_accuracy": 0.9572649572649573, |
| "eval_democracy_f1": 0.951957009924363, |
| "eval_development_accuracy": 0.9184907233687721, |
| "eval_development_f1": 0.909063933465515, |
| "eval_economy_accuracy": 0.9327704815509693, |
| "eval_economy_f1": 0.9291641275521808, |
| "eval_education_accuracy": 0.967479674796748, |
| "eval_education_f1": 0.965512166473417, |
| "eval_environment_accuracy": 0.9835313737752762, |
| "eval_environment_f1": 0.9820983135141137, |
| "eval_instability_accuracy": 0.9481967896602043, |
| "eval_instability_f1": 0.9444250756726008, |
| "eval_leadership_accuracy": 0.8285386700020846, |
| "eval_leadership_f1": 0.8147910628849035, |
| "eval_loss": 0.19577622413635254, |
| "eval_overall_accuracy": 0.9393283996942533, |
| "eval_overall_f1": 0.9346706078324559, |
| "eval_race_accuracy": 0.9551803210339795, |
| "eval_race_f1": 0.9521792365558912, |
| "eval_religion_accuracy": 0.9598707525536794, |
| "eval_religion_f1": 0.9582958411796174, |
| "eval_runtime": 10.979, |
| "eval_safety_accuracy": 0.9193245778611632, |
| "eval_safety_f1": 0.9161511742356354, |
| "eval_samples_per_second": 873.853, |
| "eval_steps_per_second": 54.65, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.1667361400583576, |
| "grad_norm": 0.7676725387573242, |
| "learning_rate": 2.2349726775956283e-05, |
| "loss": 0.137, |
| "step": 1900 |
| }, |
| { |
| "epoch": 3.333472280116715, |
| "grad_norm": 0.9001705050468445, |
| "learning_rate": 2.180327868852459e-05, |
| "loss": 0.1366, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.500208420175073, |
| "grad_norm": 0.9584451913833618, |
| "learning_rate": 2.1256830601092897e-05, |
| "loss": 0.1319, |
| "step": 2100 |
| }, |
| { |
| "epoch": 3.6669445602334307, |
| "grad_norm": 0.9352701306343079, |
| "learning_rate": 2.0710382513661204e-05, |
| "loss": 0.1324, |
| "step": 2200 |
| }, |
| { |
| "epoch": 3.8336807002917883, |
| "grad_norm": 0.8512120842933655, |
| "learning_rate": 2.0163934426229508e-05, |
| "loss": 0.1333, |
| "step": 2300 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.8892608880996704, |
| "learning_rate": 1.9617486338797815e-05, |
| "loss": 0.1266, |
| "step": 2400 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_administration_accuracy": 0.9418386491557224, |
| "eval_administration_f1": 0.9352052966447959, |
| "eval_corruption_accuracy": 0.9622680842193038, |
| "eval_corruption_f1": 0.9610755105054823, |
| "eval_democracy_accuracy": 0.9578903481342506, |
| "eval_democracy_f1": 0.9547168679700065, |
| "eval_development_accuracy": 0.9218261413383365, |
| "eval_development_f1": 0.9169350889919825, |
| "eval_economy_accuracy": 0.9337085678549093, |
| "eval_economy_f1": 0.9328708122199544, |
| "eval_education_accuracy": 0.9685219929122368, |
| "eval_education_f1": 0.9667154036014268, |
| "eval_environment_accuracy": 0.9815509693558474, |
| "eval_environment_f1": 0.9809518427248292, |
| "eval_instability_accuracy": 0.947571398790911, |
| "eval_instability_f1": 0.9441507785991868, |
| "eval_leadership_accuracy": 0.8358348968105066, |
| "eval_leadership_f1": 0.82702607509575, |
| "eval_loss": 0.1933656632900238, |
| "eval_overall_accuracy": 0.9413782919880481, |
| "eval_overall_f1": 0.9381715218630767, |
| "eval_race_accuracy": 0.9591411298728372, |
| "eval_race_f1": 0.9571038116721658, |
| "eval_religion_accuracy": 0.9621638524077548, |
| "eval_religion_f1": 0.9611025172696737, |
| "eval_runtime": 10.9996, |
| "eval_safety_accuracy": 0.9242234730039608, |
| "eval_safety_f1": 0.920204257061666, |
| "eval_samples_per_second": 872.211, |
| "eval_steps_per_second": 54.547, |
| "step": 2400 |
| }, |
| { |
| "epoch": 4.166736140058358, |
| "grad_norm": 0.6617388129234314, |
| "learning_rate": 1.907103825136612e-05, |
| "loss": 0.1039, |
| "step": 2500 |
| }, |
| { |
| "epoch": 4.333472280116715, |
| "grad_norm": 0.7575182914733887, |
| "learning_rate": 1.8524590163934426e-05, |
| "loss": 0.1105, |
| "step": 2600 |
| }, |
| { |
| "epoch": 4.500208420175073, |
| "grad_norm": 0.69528728723526, |
| "learning_rate": 1.7978142076502733e-05, |
| "loss": 0.1084, |
| "step": 2700 |
| }, |
| { |
| "epoch": 4.66694456023343, |
| "grad_norm": 0.9533807039260864, |
| "learning_rate": 1.743169398907104e-05, |
| "loss": 0.1051, |
| "step": 2800 |
| }, |
| { |
| "epoch": 4.833680700291788, |
| "grad_norm": 0.7591508030891418, |
| "learning_rate": 1.6885245901639344e-05, |
| "loss": 0.1008, |
| "step": 2900 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 1.3957374095916748, |
| "learning_rate": 1.633879781420765e-05, |
| "loss": 0.1013, |
| "step": 3000 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_administration_accuracy": 0.9406920992286846, |
| "eval_administration_f1": 0.935638525668697, |
| "eval_corruption_accuracy": 0.9626850114654992, |
| "eval_corruption_f1": 0.9611422434993832, |
| "eval_democracy_accuracy": 0.9496560350218887, |
| "eval_democracy_f1": 0.9499559979035322, |
| "eval_development_accuracy": 0.9201584323535543, |
| "eval_development_f1": 0.9155534426644083, |
| "eval_economy_accuracy": 0.9354805086512403, |
| "eval_economy_f1": 0.9325398706072688, |
| "eval_education_accuracy": 0.9697727746508235, |
| "eval_education_f1": 0.9681349456850368, |
| "eval_environment_accuracy": 0.9820721284135918, |
| "eval_environment_f1": 0.9811885272501003, |
| "eval_instability_accuracy": 0.9472587033562644, |
| "eval_instability_f1": 0.9442377846524443, |
| "eval_leadership_accuracy": 0.8364602876797999, |
| "eval_leadership_f1": 0.828026933450708, |
| "eval_loss": 0.19940470159053802, |
| "eval_overall_accuracy": 0.9405183795427697, |
| "eval_overall_f1": 0.9377003414537354, |
| "eval_race_accuracy": 0.9554930164686263, |
| "eval_race_f1": 0.9547403715439807, |
| "eval_religion_accuracy": 0.9633104023347926, |
| "eval_religion_f1": 0.9620625852940151, |
| "eval_runtime": 10.9306, |
| "eval_safety_accuracy": 0.923181154888472, |
| "eval_safety_f1": 0.9191828692252488, |
| "eval_samples_per_second": 877.722, |
| "eval_steps_per_second": 54.892, |
| "step": 3000 |
| }, |
| { |
| "epoch": 5.166736140058358, |
| "grad_norm": 0.5871491432189941, |
| "learning_rate": 1.5792349726775955e-05, |
| "loss": 0.0873, |
| "step": 3100 |
| }, |
| { |
| "epoch": 5.333472280116715, |
| "grad_norm": 0.8581504225730896, |
| "learning_rate": 1.5245901639344264e-05, |
| "loss": 0.0862, |
| "step": 3200 |
| }, |
| { |
| "epoch": 5.500208420175073, |
| "grad_norm": 0.9329476952552795, |
| "learning_rate": 1.4699453551912569e-05, |
| "loss": 0.0849, |
| "step": 3300 |
| }, |
| { |
| "epoch": 5.66694456023343, |
| "grad_norm": 0.8767560124397278, |
| "learning_rate": 1.4153005464480874e-05, |
| "loss": 0.088, |
| "step": 3400 |
| }, |
| { |
| "epoch": 5.833680700291788, |
| "grad_norm": 0.8050324320793152, |
| "learning_rate": 1.3606557377049181e-05, |
| "loss": 0.0826, |
| "step": 3500 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 0.9876068830490112, |
| "learning_rate": 1.3060109289617487e-05, |
| "loss": 0.0811, |
| "step": 3600 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_administration_accuracy": 0.9434021263289556, |
| "eval_administration_f1": 0.9387369505688391, |
| "eval_corruption_accuracy": 0.962997706900146, |
| "eval_corruption_f1": 0.9612502739505763, |
| "eval_democracy_accuracy": 0.9562226391494684, |
| "eval_democracy_f1": 0.954550595976141, |
| "eval_development_accuracy": 0.9203668959766521, |
| "eval_development_f1": 0.9144818227568411, |
| "eval_economy_accuracy": 0.9361058995205337, |
| "eval_economy_f1": 0.9313226219336369, |
| "eval_education_accuracy": 0.9694600792161768, |
| "eval_education_f1": 0.9675599527596068, |
| "eval_environment_accuracy": 0.9811340421096518, |
| "eval_environment_f1": 0.9803282887818994, |
| "eval_instability_accuracy": 0.9459036898061288, |
| "eval_instability_f1": 0.942472499866658, |
| "eval_leadership_accuracy": 0.8430268918073797, |
| "eval_leadership_f1": 0.8387770890259914, |
| "eval_loss": 0.20078489184379578, |
| "eval_overall_accuracy": 0.941647557501216, |
| "eval_overall_f1": 0.938780687918615, |
| "eval_race_accuracy": 0.9580988117573483, |
| "eval_race_f1": 0.9562411064132649, |
| "eval_religion_accuracy": 0.961955388784657, |
| "eval_religion_f1": 0.960819752774671, |
| "eval_runtime": 11.0615, |
| "eval_safety_accuracy": 0.9210965186574943, |
| "eval_safety_f1": 0.9188273002152547, |
| "eval_samples_per_second": 867.33, |
| "eval_steps_per_second": 54.242, |
| "step": 3600 |
| }, |
| { |
| "epoch": 6.166736140058358, |
| "grad_norm": 0.8045951724052429, |
| "learning_rate": 1.2513661202185792e-05, |
| "loss": 0.0701, |
| "step": 3700 |
| }, |
| { |
| "epoch": 6.333472280116715, |
| "grad_norm": 0.6562448143959045, |
| "learning_rate": 1.19672131147541e-05, |
| "loss": 0.0726, |
| "step": 3800 |
| }, |
| { |
| "epoch": 6.500208420175073, |
| "grad_norm": 0.7833376526832581, |
| "learning_rate": 1.1420765027322405e-05, |
| "loss": 0.0673, |
| "step": 3900 |
| }, |
| { |
| "epoch": 6.66694456023343, |
| "grad_norm": 0.8071606755256653, |
| "learning_rate": 1.087431693989071e-05, |
| "loss": 0.0733, |
| "step": 4000 |
| }, |
| { |
| "epoch": 6.833680700291788, |
| "grad_norm": 0.7821515798568726, |
| "learning_rate": 1.0327868852459017e-05, |
| "loss": 0.0678, |
| "step": 4100 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 0.6441165804862976, |
| "learning_rate": 9.781420765027323e-06, |
| "loss": 0.0701, |
| "step": 4200 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_administration_accuracy": 0.9396497811131957, |
| "eval_administration_f1": 0.9371884848945066, |
| "eval_corruption_accuracy": 0.9616426933500104, |
| "eval_corruption_f1": 0.9603149034436674, |
| "eval_democracy_accuracy": 0.9548676255993329, |
| "eval_democracy_f1": 0.9544642189136688, |
| "eval_development_accuracy": 0.919533041484261, |
| "eval_development_f1": 0.9174833579112073, |
| "eval_economy_accuracy": 0.9370439858244737, |
| "eval_economy_f1": 0.9350264441971636, |
| "eval_education_accuracy": 0.9670627475505524, |
| "eval_education_f1": 0.9657003505198045, |
| "eval_environment_accuracy": 0.9821763602251408, |
| "eval_environment_f1": 0.9808859370812918, |
| "eval_instability_accuracy": 0.9445486762559934, |
| "eval_instability_f1": 0.9432538618242539, |
| "eval_leadership_accuracy": 0.8441734417344173, |
| "eval_leadership_f1": 0.8426503273307849, |
| "eval_loss": 0.21108125150203705, |
| "eval_overall_accuracy": 0.9408397609617123, |
| "eval_overall_f1": 0.9393817348962156, |
| "eval_race_accuracy": 0.9566395663956639, |
| "eval_race_f1": 0.9559208826126899, |
| "eval_religion_accuracy": 0.9609130706691682, |
| "eval_religion_f1": 0.9608002073614549, |
| "eval_runtime": 10.9796, |
| "eval_safety_accuracy": 0.9218261413383365, |
| "eval_safety_f1": 0.9188918426640929, |
| "eval_samples_per_second": 873.8, |
| "eval_steps_per_second": 54.647, |
| "step": 4200 |
| }, |
| { |
| "epoch": 7.166736140058358, |
| "grad_norm": 0.6236246228218079, |
| "learning_rate": 9.234972677595628e-06, |
| "loss": 0.0597, |
| "step": 4300 |
| }, |
| { |
| "epoch": 7.333472280116715, |
| "grad_norm": 0.6468791365623474, |
| "learning_rate": 8.688524590163935e-06, |
| "loss": 0.0568, |
| "step": 4400 |
| }, |
| { |
| "epoch": 7.500208420175073, |
| "grad_norm": 0.7850766777992249, |
| "learning_rate": 8.14207650273224e-06, |
| "loss": 0.0586, |
| "step": 4500 |
| }, |
| { |
| "epoch": 7.66694456023343, |
| "grad_norm": 0.9113643169403076, |
| "learning_rate": 7.595628415300546e-06, |
| "loss": 0.0611, |
| "step": 4600 |
| }, |
| { |
| "epoch": 7.833680700291788, |
| "grad_norm": 0.9405547976493835, |
| "learning_rate": 7.049180327868853e-06, |
| "loss": 0.0636, |
| "step": 4700 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.5414742827415466, |
| "learning_rate": 6.502732240437159e-06, |
| "loss": 0.0588, |
| "step": 4800 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_administration_accuracy": 0.9413174900979779, |
| "eval_administration_f1": 0.9368824702703686, |
| "eval_corruption_accuracy": 0.9614342297269126, |
| "eval_corruption_f1": 0.9602768664022149, |
| "eval_democracy_accuracy": 0.9560141755263707, |
| "eval_democracy_f1": 0.9543585350199038, |
| "eval_development_accuracy": 0.9192203460496143, |
| "eval_development_f1": 0.9152605716450639, |
| "eval_economy_accuracy": 0.9381905357515113, |
| "eval_economy_f1": 0.935106524044037, |
| "eval_education_accuracy": 0.9688346883468835, |
| "eval_education_f1": 0.9673054511905975, |
| "eval_environment_accuracy": 0.9822805920366896, |
| "eval_environment_f1": 0.980929327817527, |
| "eval_instability_accuracy": 0.9455909943714822, |
| "eval_instability_f1": 0.9437005892876725, |
| "eval_leadership_accuracy": 0.8438607462997707, |
| "eval_leadership_f1": 0.8410810165791273, |
| "eval_loss": 0.215087428689003, |
| "eval_overall_accuracy": 0.9414651518310054, |
| "eval_overall_f1": 0.93924957060805, |
| "eval_race_accuracy": 0.9572649572649573, |
| "eval_race_f1": 0.9565627258185468, |
| "eval_religion_accuracy": 0.9615384615384616, |
| "eval_religion_f1": 0.9610239728595763, |
| "eval_runtime": 10.9675, |
| "eval_safety_accuracy": 0.9220346049614342, |
| "eval_safety_f1": 0.9185067963619639, |
| "eval_samples_per_second": 874.763, |
| "eval_steps_per_second": 54.707, |
| "step": 4800 |
| }, |
| { |
| "epoch": 8.166736140058358, |
| "grad_norm": 0.5899946093559265, |
| "learning_rate": 5.956284153005465e-06, |
| "loss": 0.0543, |
| "step": 4900 |
| }, |
| { |
| "epoch": 8.333472280116716, |
| "grad_norm": 0.5821051597595215, |
| "learning_rate": 5.409836065573771e-06, |
| "loss": 0.0528, |
| "step": 5000 |
| }, |
| { |
| "epoch": 8.500208420175072, |
| "grad_norm": 0.7321776151657104, |
| "learning_rate": 4.863387978142076e-06, |
| "loss": 0.0532, |
| "step": 5100 |
| }, |
| { |
| "epoch": 8.66694456023343, |
| "grad_norm": 0.7479367256164551, |
| "learning_rate": 4.316939890710383e-06, |
| "loss": 0.0519, |
| "step": 5200 |
| }, |
| { |
| "epoch": 8.833680700291788, |
| "grad_norm": 0.8334552049636841, |
| "learning_rate": 3.770491803278689e-06, |
| "loss": 0.0505, |
| "step": 5300 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 0.7611594200134277, |
| "learning_rate": 3.2240437158469947e-06, |
| "loss": 0.0528, |
| "step": 5400 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_administration_accuracy": 0.9398582447362935, |
| "eval_administration_f1": 0.9370835475227389, |
| "eval_corruption_accuracy": 0.9618511569731082, |
| "eval_corruption_f1": 0.9605413145422365, |
| "eval_democracy_accuracy": 0.9559099437148217, |
| "eval_democracy_f1": 0.9549404791061389, |
| "eval_development_accuracy": 0.9212007504690432, |
| "eval_development_f1": 0.9170777974760537, |
| "eval_economy_accuracy": 0.9381905357515113, |
| "eval_economy_f1": 0.93592555672295, |
| "eval_education_accuracy": 0.9682092974775902, |
| "eval_education_f1": 0.9667368725950756, |
| "eval_environment_accuracy": 0.9813425057327496, |
| "eval_environment_f1": 0.9802272421649649, |
| "eval_instability_accuracy": 0.9472587033562644, |
| "eval_instability_f1": 0.9448551688210497, |
| "eval_leadership_accuracy": 0.8438607462997707, |
| "eval_leadership_f1": 0.8412354371277554, |
| "eval_loss": 0.21861030161380768, |
| "eval_overall_accuracy": 0.9413609200194567, |
| "eval_overall_f1": 0.9394140341920821, |
| "eval_race_accuracy": 0.9572649572649573, |
| "eval_race_f1": 0.9563061857501846, |
| "eval_religion_accuracy": 0.961955388784657, |
| "eval_religion_f1": 0.9612659459831671, |
| "eval_runtime": 11.1696, |
| "eval_safety_accuracy": 0.9194288096727121, |
| "eval_safety_f1": 0.9167728624926694, |
| "eval_samples_per_second": 858.939, |
| "eval_steps_per_second": 53.717, |
| "step": 5400 |
| }, |
| { |
| "epoch": 9.166736140058358, |
| "grad_norm": 0.5734995007514954, |
| "learning_rate": 2.6775956284153005e-06, |
| "loss": 0.0483, |
| "step": 5500 |
| }, |
| { |
| "epoch": 9.333472280116716, |
| "grad_norm": 0.4083118736743927, |
| "learning_rate": 2.1311475409836063e-06, |
| "loss": 0.0491, |
| "step": 5600 |
| }, |
| { |
| "epoch": 9.500208420175072, |
| "grad_norm": 0.6585514545440674, |
| "learning_rate": 1.5846994535519126e-06, |
| "loss": 0.0488, |
| "step": 5700 |
| }, |
| { |
| "epoch": 9.66694456023343, |
| "grad_norm": 0.8815754055976868, |
| "learning_rate": 1.0382513661202186e-06, |
| "loss": 0.0485, |
| "step": 5800 |
| }, |
| { |
| "epoch": 9.833680700291788, |
| "grad_norm": 0.7910645008087158, |
| "learning_rate": 4.918032786885246e-07, |
| "loss": 0.0475, |
| "step": 5900 |
| }, |
| { |
| "epoch": 9.98374322634431, |
| "eval_administration_accuracy": 0.9398582447362935, |
| "eval_administration_f1": 0.9361942162273019, |
| "eval_corruption_accuracy": 0.9618511569731082, |
| "eval_corruption_f1": 0.9605486831469265, |
| "eval_democracy_accuracy": 0.9554930164686263, |
| "eval_democracy_f1": 0.9543821493687584, |
| "eval_development_accuracy": 0.9214092140921409, |
| "eval_development_f1": 0.9172870275429638, |
| "eval_economy_accuracy": 0.9368355222013759, |
| "eval_economy_f1": 0.9345244446666318, |
| "eval_education_accuracy": 0.9690431519699813, |
| "eval_education_f1": 0.9674369063891264, |
| "eval_environment_accuracy": 0.9807171148634564, |
| "eval_environment_f1": 0.9797111999641163, |
| "eval_instability_accuracy": 0.9462163852407754, |
| "eval_instability_f1": 0.9447235769030393, |
| "eval_leadership_accuracy": 0.8455284552845529, |
| "eval_leadership_f1": 0.8428711309576332, |
| "eval_loss": 0.22077877819538116, |
| "eval_overall_accuracy": 0.9413956639566394, |
| "eval_overall_f1": 0.9394484894664766, |
| "eval_race_accuracy": 0.9575776526996039, |
| "eval_race_f1": 0.9567385833513858, |
| "eval_religion_accuracy": 0.962059620596206, |
| "eval_religion_f1": 0.9614110923085984, |
| "eval_runtime": 11.0829, |
| "eval_safety_accuracy": 0.9201584323535543, |
| "eval_safety_f1": 0.9175528627712379, |
| "eval_samples_per_second": 865.657, |
| "eval_steps_per_second": 54.137, |
| "step": 5990 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 5990, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 2, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.932543652062822e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": { |
| "gradient_accumulation_steps": 4, |
| "learning_rate": 3e-05, |
| "num_train_epochs": 10 |
| } |
| } |
|
|