| { |
| "best_global_step": 4798, |
| "best_metric": 0.9342598776015526, |
| "best_model_checkpoint": "./results/run-1/checkpoint-4798", |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 4798, |
| "is_hyper_param_search": true, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.041684035014589414, |
| "grad_norm": 2.362386465072632, |
| "learning_rate": 9.900000000000002e-06, |
| "loss": 1.3944, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08336807002917883, |
| "grad_norm": 0.7483571767807007, |
| "learning_rate": 1.9900000000000003e-05, |
| "loss": 0.5576, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.12505210504376824, |
| "grad_norm": 0.8563070297241211, |
| "learning_rate": 2.9900000000000002e-05, |
| "loss": 0.4502, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.16673614005835766, |
| "grad_norm": 0.7200814485549927, |
| "learning_rate": 3.99e-05, |
| "loss": 0.388, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.20842017507294705, |
| "grad_norm": 1.4553083181381226, |
| "learning_rate": 4.99e-05, |
| "loss": 0.3508, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.25010421008753647, |
| "grad_norm": 0.9915399551391602, |
| "learning_rate": 4.978927203065135e-05, |
| "loss": 0.3186, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.29178824510212586, |
| "grad_norm": 0.9978141188621521, |
| "learning_rate": 4.957641549595573e-05, |
| "loss": 0.3087, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.3334722801167153, |
| "grad_norm": 1.3709461688995361, |
| "learning_rate": 4.936355896126011e-05, |
| "loss": 0.2976, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.3751563151313047, |
| "grad_norm": 1.82745361328125, |
| "learning_rate": 4.91507024265645e-05, |
| "loss": 0.28, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4168403501458941, |
| "grad_norm": 1.2072925567626953, |
| "learning_rate": 4.893784589186888e-05, |
| "loss": 0.2751, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.45852438516048355, |
| "grad_norm": 1.0433937311172485, |
| "learning_rate": 4.872498935717327e-05, |
| "loss": 0.2712, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.5002084201750729, |
| "grad_norm": 1.1097549200057983, |
| "learning_rate": 4.851213282247765e-05, |
| "loss": 0.2909, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.5418924551896623, |
| "grad_norm": 1.2917771339416504, |
| "learning_rate": 4.8299276287782034e-05, |
| "loss": 0.2501, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.5835764902042517, |
| "grad_norm": 0.9812286496162415, |
| "learning_rate": 4.808641975308642e-05, |
| "loss": 0.2621, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.6252605252188412, |
| "grad_norm": 1.1786612272262573, |
| "learning_rate": 4.7873563218390804e-05, |
| "loss": 0.2435, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6669445602334306, |
| "grad_norm": 1.4759145975112915, |
| "learning_rate": 4.766070668369519e-05, |
| "loss": 0.2398, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.70862859524802, |
| "grad_norm": 1.5548242330551147, |
| "learning_rate": 4.744785014899958e-05, |
| "loss": 0.2432, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.7503126302626094, |
| "grad_norm": 1.9005539417266846, |
| "learning_rate": 4.723499361430396e-05, |
| "loss": 0.2351, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.7919966652771988, |
| "grad_norm": 1.5831588506698608, |
| "learning_rate": 4.702213707960835e-05, |
| "loss": 0.2378, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.8336807002917882, |
| "grad_norm": 1.5482088327407837, |
| "learning_rate": 4.680928054491273e-05, |
| "loss": 0.2271, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.8753647353063777, |
| "grad_norm": 1.1437913179397583, |
| "learning_rate": 4.6596424010217114e-05, |
| "loss": 0.231, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.9170487703209671, |
| "grad_norm": 0.7985406517982483, |
| "learning_rate": 4.63835674755215e-05, |
| "loss": 0.228, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.9587328053355565, |
| "grad_norm": 1.9323142766952515, |
| "learning_rate": 4.6170710940825884e-05, |
| "loss": 0.2198, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_administration_accuracy": 0.9322493224932249, |
| "eval_administration_f1": 0.9122911143651031, |
| "eval_corruption_accuracy": 0.9560141755263707, |
| "eval_corruption_f1": 0.9517304794081561, |
| "eval_democracy_accuracy": 0.9505941213258287, |
| "eval_democracy_f1": 0.9401838742047087, |
| "eval_development_accuracy": 0.9118198874296435, |
| "eval_development_f1": 0.9003553411809495, |
| "eval_economy_accuracy": 0.9264123410464874, |
| "eval_economy_f1": 0.9202465382357672, |
| "eval_education_accuracy": 0.9643527204502814, |
| "eval_education_f1": 0.9615509617901187, |
| "eval_environment_accuracy": 0.9799874921826142, |
| "eval_environment_f1": 0.9786142985651582, |
| "eval_instability_accuracy": 0.9417344173441734, |
| "eval_instability_f1": 0.9340921018628459, |
| "eval_leadership_accuracy": 0.8085261621846987, |
| "eval_leadership_f1": 0.7858417402549625, |
| "eval_loss": 0.21971678733825684, |
| "eval_overall_accuracy": 0.9324317281634354, |
| "eval_overall_f1": 0.9244250028140656, |
| "eval_race_accuracy": 0.9508025849489264, |
| "eval_race_f1": 0.9481714649378918, |
| "eval_religion_accuracy": 0.9530956848030019, |
| "eval_religion_f1": 0.9518106778632144, |
| "eval_runtime": 10.9084, |
| "eval_safety_accuracy": 0.9135918282259746, |
| "eval_safety_f1": 0.9082114410999095, |
| "eval_samples_per_second": 879.505, |
| "eval_steps_per_second": 55.003, |
| "step": 2399 |
| }, |
| { |
| "epoch": 1.0004168403501459, |
| "grad_norm": 1.2817295789718628, |
| "learning_rate": 4.5957854406130265e-05, |
| "loss": 0.2397, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.0421008753647354, |
| "grad_norm": 1.3623204231262207, |
| "learning_rate": 4.5744997871434654e-05, |
| "loss": 0.1885, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.0837849103793247, |
| "grad_norm": 1.2534806728363037, |
| "learning_rate": 4.553214133673904e-05, |
| "loss": 0.1843, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.1254689453939142, |
| "grad_norm": 1.2860488891601562, |
| "learning_rate": 4.5319284802043424e-05, |
| "loss": 0.1817, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.1671529804085035, |
| "grad_norm": 1.2887414693832397, |
| "learning_rate": 4.510642826734781e-05, |
| "loss": 0.1798, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.208837015423093, |
| "grad_norm": 1.973421335220337, |
| "learning_rate": 4.4893571732652194e-05, |
| "loss": 0.1878, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.2505210504376825, |
| "grad_norm": 0.7910940051078796, |
| "learning_rate": 4.468071519795658e-05, |
| "loss": 0.182, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.2922050854522718, |
| "grad_norm": 1.7422680854797363, |
| "learning_rate": 4.4467858663260964e-05, |
| "loss": 0.1833, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.3338891204668613, |
| "grad_norm": 1.1493451595306396, |
| "learning_rate": 4.4255002128565345e-05, |
| "loss": 0.1766, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.3755731554814505, |
| "grad_norm": 2.179731845855713, |
| "learning_rate": 4.4042145593869734e-05, |
| "loss": 0.1785, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.41725719049604, |
| "grad_norm": 1.5773117542266846, |
| "learning_rate": 4.3829289059174115e-05, |
| "loss": 0.1901, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.4589412255106295, |
| "grad_norm": 2.4681854248046875, |
| "learning_rate": 4.3616432524478504e-05, |
| "loss": 0.1833, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.5006252605252188, |
| "grad_norm": 2.391803503036499, |
| "learning_rate": 4.340357598978289e-05, |
| "loss": 0.1677, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.5423092955398081, |
| "grad_norm": 0.8182645440101624, |
| "learning_rate": 4.3190719455087274e-05, |
| "loss": 0.1666, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.5839933305543976, |
| "grad_norm": 1.2136952877044678, |
| "learning_rate": 4.297786292039166e-05, |
| "loss": 0.1787, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.6256773655689871, |
| "grad_norm": 2.435816764831543, |
| "learning_rate": 4.2765006385696044e-05, |
| "loss": 0.1678, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.6673614005835766, |
| "grad_norm": 1.5172932147979736, |
| "learning_rate": 4.2552149851000425e-05, |
| "loss": 0.1779, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.709045435598166, |
| "grad_norm": 1.4677119255065918, |
| "learning_rate": 4.2339293316304814e-05, |
| "loss": 0.1824, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.7507294706127552, |
| "grad_norm": 1.8327163457870483, |
| "learning_rate": 4.2126436781609195e-05, |
| "loss": 0.1719, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.7924135056273447, |
| "grad_norm": 1.2324726581573486, |
| "learning_rate": 4.1913580246913584e-05, |
| "loss": 0.1592, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.8340975406419342, |
| "grad_norm": 3.6276469230651855, |
| "learning_rate": 4.1700723712217965e-05, |
| "loss": 0.1636, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.8757815756565237, |
| "grad_norm": 0.940485417842865, |
| "learning_rate": 4.148786717752235e-05, |
| "loss": 0.1639, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.917465610671113, |
| "grad_norm": 2.002577781677246, |
| "learning_rate": 4.127501064282674e-05, |
| "loss": 0.1756, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.9591496456857023, |
| "grad_norm": 2.6062567234039307, |
| "learning_rate": 4.1062154108131124e-05, |
| "loss": 0.1729, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_administration_accuracy": 0.9374609130706691, |
| "eval_administration_f1": 0.9302102302828306, |
| "eval_corruption_accuracy": 0.9603919116114238, |
| "eval_corruption_f1": 0.9585025895331236, |
| "eval_democracy_accuracy": 0.9562226391494684, |
| "eval_democracy_f1": 0.9505005443286667, |
| "eval_development_accuracy": 0.9199499687304565, |
| "eval_development_f1": 0.9145013995891959, |
| "eval_economy_accuracy": 0.9319366270585783, |
| "eval_economy_f1": 0.9302256375627648, |
| "eval_education_accuracy": 0.9651865749426725, |
| "eval_education_f1": 0.9641689686262792, |
| "eval_environment_accuracy": 0.9806128830519074, |
| "eval_environment_f1": 0.9798389855050841, |
| "eval_instability_accuracy": 0.9244319366270586, |
| "eval_instability_f1": 0.9301250908302109, |
| "eval_leadership_accuracy": 0.8395872420262664, |
| "eval_leadership_f1": 0.830858580138579, |
| "eval_loss": 0.20445191860198975, |
| "eval_overall_accuracy": 0.9366791744840525, |
| "eval_overall_f1": 0.9342598776015526, |
| "eval_race_accuracy": 0.9545549301646863, |
| "eval_race_f1": 0.9531323667533257, |
| "eval_religion_accuracy": 0.9575776526996039, |
| "eval_religion_f1": 0.9560680494194458, |
| "eval_runtime": 10.9745, |
| "eval_safety_accuracy": 0.9122368146758391, |
| "eval_safety_f1": 0.9129860886491253, |
| "eval_samples_per_second": 874.208, |
| "eval_steps_per_second": 54.672, |
| "step": 4798 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 23990, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 2, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.9790964629766144e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": { |
| "gradient_accumulation_steps": 1, |
| "learning_rate": 5e-05, |
| "num_train_epochs": 10 |
| } |
| } |
|
|