| { |
| "best_global_step": 7197, |
| "best_metric": 0.9341971854179941, |
| "best_model_checkpoint": "./results/run-3/checkpoint-7197", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 7197, |
| "is_hyper_param_search": true, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.041684035014589414, |
| "grad_norm": 1.7545671463012695, |
| "learning_rate": 1.386e-05, |
| "loss": 1.2954, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08336807002917883, |
| "grad_norm": 0.7225435376167297, |
| "learning_rate": 2.7859999999999998e-05, |
| "loss": 0.5134, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.12505210504376824, |
| "grad_norm": 0.9463444948196411, |
| "learning_rate": 4.1859999999999996e-05, |
| "loss": 0.4399, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.16673614005835766, |
| "grad_norm": 0.8181287050247192, |
| "learning_rate": 5.586e-05, |
| "loss": 0.3608, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.20842017507294705, |
| "grad_norm": 1.4074561595916748, |
| "learning_rate": 6.986e-05, |
| "loss": 0.3303, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.25010421008753647, |
| "grad_norm": 1.232994556427002, |
| "learning_rate": 6.98170731707317e-05, |
| "loss": 0.3118, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.29178824510212586, |
| "grad_norm": 1.1151797771453857, |
| "learning_rate": 6.96341463414634e-05, |
| "loss": 0.3067, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.3334722801167153, |
| "grad_norm": 1.3453563451766968, |
| "learning_rate": 6.944937176644493e-05, |
| "loss": 0.2959, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.3751563151313047, |
| "grad_norm": 1.3014110326766968, |
| "learning_rate": 6.926459719142646e-05, |
| "loss": 0.2803, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4168403501458941, |
| "grad_norm": 2.4783546924591064, |
| "learning_rate": 6.907982261640798e-05, |
| "loss": 0.275, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.45852438516048355, |
| "grad_norm": 1.0991694927215576, |
| "learning_rate": 6.88950480413895e-05, |
| "loss": 0.2725, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.5002084201750729, |
| "grad_norm": 1.8534690141677856, |
| "learning_rate": 6.871027346637102e-05, |
| "loss": 0.2965, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.5418924551896623, |
| "grad_norm": 1.533751130104065, |
| "learning_rate": 6.852549889135254e-05, |
| "loss": 0.257, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.5835764902042517, |
| "grad_norm": 1.39555025100708, |
| "learning_rate": 6.834072431633407e-05, |
| "loss": 0.2666, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.6252605252188412, |
| "grad_norm": 1.1627668142318726, |
| "learning_rate": 6.81559497413156e-05, |
| "loss": 0.2461, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6669445602334306, |
| "grad_norm": 1.319166660308838, |
| "learning_rate": 6.79711751662971e-05, |
| "loss": 0.242, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.70862859524802, |
| "grad_norm": 1.3535823822021484, |
| "learning_rate": 6.778640059127863e-05, |
| "loss": 0.2463, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.7503126302626094, |
| "grad_norm": 1.8609535694122314, |
| "learning_rate": 6.760162601626016e-05, |
| "loss": 0.2367, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.7919966652771988, |
| "grad_norm": 1.86128830909729, |
| "learning_rate": 6.741685144124168e-05, |
| "loss": 0.2449, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.8336807002917882, |
| "grad_norm": 1.8068938255310059, |
| "learning_rate": 6.723207686622321e-05, |
| "loss": 0.2329, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.8753647353063777, |
| "grad_norm": 1.4341034889221191, |
| "learning_rate": 6.704730229120472e-05, |
| "loss": 0.2342, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.9170487703209671, |
| "grad_norm": 0.7307045459747314, |
| "learning_rate": 6.686252771618624e-05, |
| "loss": 0.2358, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.9587328053355565, |
| "grad_norm": 2.0308947563171387, |
| "learning_rate": 6.667775314116777e-05, |
| "loss": 0.2244, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_administration_accuracy": 0.9317281634354805, |
| "eval_administration_f1": 0.9110301299328539, |
| "eval_corruption_accuracy": 0.9541380029184907, |
| "eval_corruption_f1": 0.9489574147600867, |
| "eval_democracy_accuracy": 0.9473629351678132, |
| "eval_democracy_f1": 0.935646230598331, |
| "eval_development_accuracy": 0.910464873879508, |
| "eval_development_f1": 0.9008646598644421, |
| "eval_economy_accuracy": 0.9235980821346675, |
| "eval_economy_f1": 0.9156600494130359, |
| "eval_education_accuracy": 0.9622680842193038, |
| "eval_education_f1": 0.9586233907920864, |
| "eval_environment_accuracy": 0.978319783197832, |
| "eval_environment_f1": 0.9770754665605412, |
| "eval_instability_accuracy": 0.9398582447362935, |
| "eval_instability_f1": 0.9299109706262821, |
| "eval_leadership_accuracy": 0.8081092349385033, |
| "eval_leadership_f1": 0.7818188134942813, |
| "eval_loss": 0.2264394313097, |
| "eval_overall_accuracy": 0.9310245987075256, |
| "eval_overall_f1": 0.9219705720705208, |
| "eval_race_accuracy": 0.9487179487179487, |
| "eval_race_f1": 0.9448562040025291, |
| "eval_religion_accuracy": 0.9527829893683553, |
| "eval_religion_f1": 0.950851997767183, |
| "eval_runtime": 11.1066, |
| "eval_safety_accuracy": 0.9149468417761101, |
| "eval_safety_f1": 0.9083515370345979, |
| "eval_samples_per_second": 863.809, |
| "eval_steps_per_second": 54.022, |
| "step": 2399 |
| }, |
| { |
| "epoch": 1.0004168403501459, |
| "grad_norm": 1.3034080266952515, |
| "learning_rate": 6.64929785661493e-05, |
| "loss": 0.2417, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.0421008753647354, |
| "grad_norm": 1.3565603494644165, |
| "learning_rate": 6.630820399113082e-05, |
| "loss": 0.1946, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.0837849103793247, |
| "grad_norm": 2.098259449005127, |
| "learning_rate": 6.612342941611233e-05, |
| "loss": 0.1911, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.1254689453939142, |
| "grad_norm": 1.0764553546905518, |
| "learning_rate": 6.593865484109386e-05, |
| "loss": 0.1821, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.1671529804085035, |
| "grad_norm": 1.4594541788101196, |
| "learning_rate": 6.575388026607538e-05, |
| "loss": 0.1851, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.208837015423093, |
| "grad_norm": 1.8065540790557861, |
| "learning_rate": 6.556910569105691e-05, |
| "loss": 0.1929, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.2505210504376825, |
| "grad_norm": 0.8411226272583008, |
| "learning_rate": 6.538433111603843e-05, |
| "loss": 0.1882, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.2922050854522718, |
| "grad_norm": 1.886057734489441, |
| "learning_rate": 6.519955654101994e-05, |
| "loss": 0.1861, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.3338891204668613, |
| "grad_norm": 1.2633222341537476, |
| "learning_rate": 6.501478196600147e-05, |
| "loss": 0.1816, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.3755731554814505, |
| "grad_norm": 2.1033339500427246, |
| "learning_rate": 6.4830007390983e-05, |
| "loss": 0.1887, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.41725719049604, |
| "grad_norm": 1.5505166053771973, |
| "learning_rate": 6.464523281596452e-05, |
| "loss": 0.1946, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.4589412255106295, |
| "grad_norm": 1.7376617193222046, |
| "learning_rate": 6.446045824094603e-05, |
| "loss": 0.1881, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.5006252605252188, |
| "grad_norm": 1.8713116645812988, |
| "learning_rate": 6.427568366592756e-05, |
| "loss": 0.1742, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.5423092955398081, |
| "grad_norm": 1.2346725463867188, |
| "learning_rate": 6.409090909090908e-05, |
| "loss": 0.1733, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.5839933305543976, |
| "grad_norm": 1.5446250438690186, |
| "learning_rate": 6.390613451589061e-05, |
| "loss": 0.1885, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.6256773655689871, |
| "grad_norm": 2.538613796234131, |
| "learning_rate": 6.372135994087213e-05, |
| "loss": 0.1737, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.6673614005835766, |
| "grad_norm": 1.1538047790527344, |
| "learning_rate": 6.353658536585365e-05, |
| "loss": 0.1836, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.709045435598166, |
| "grad_norm": 2.285994291305542, |
| "learning_rate": 6.335181079083517e-05, |
| "loss": 0.1934, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.7507294706127552, |
| "grad_norm": 1.3632721900939941, |
| "learning_rate": 6.31670362158167e-05, |
| "loss": 0.1792, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.7924135056273447, |
| "grad_norm": 1.3432073593139648, |
| "learning_rate": 6.298226164079822e-05, |
| "loss": 0.1665, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.8340975406419342, |
| "grad_norm": 4.086724758148193, |
| "learning_rate": 6.279748706577975e-05, |
| "loss": 0.1761, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.8757815756565237, |
| "grad_norm": 1.1902227401733398, |
| "learning_rate": 6.261271249076126e-05, |
| "loss": 0.1738, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.917465610671113, |
| "grad_norm": 2.1310391426086426, |
| "learning_rate": 6.242793791574278e-05, |
| "loss": 0.1822, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.9591496456857023, |
| "grad_norm": 1.384017825126648, |
| "learning_rate": 6.224316334072431e-05, |
| "loss": 0.1827, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_administration_accuracy": 0.9382947675630603, |
| "eval_administration_f1": 0.93083241604641, |
| "eval_corruption_accuracy": 0.9589326662497394, |
| "eval_corruption_f1": 0.9564057119817679, |
| "eval_democracy_accuracy": 0.9522618303106108, |
| "eval_democracy_f1": 0.9459837872310805, |
| "eval_development_accuracy": 0.9119241192411924, |
| "eval_development_f1": 0.9077803592136469, |
| "eval_economy_accuracy": 0.9272461955388784, |
| "eval_economy_f1": 0.9256537091607892, |
| "eval_education_accuracy": 0.9613299979153638, |
| "eval_education_f1": 0.9608415429902122, |
| "eval_environment_accuracy": 0.978319783197832, |
| "eval_environment_f1": 0.9774790124713261, |
| "eval_instability_accuracy": 0.9247446320617052, |
| "eval_instability_f1": 0.9296973340071476, |
| "eval_leadership_accuracy": 0.8296852199291224, |
| "eval_leadership_f1": 0.8138352706726838, |
| "eval_loss": 0.21539363265037537, |
| "eval_overall_accuracy": 0.9336477659648392, |
| "eval_overall_f1": 0.9304371117477391, |
| "eval_race_accuracy": 0.9497602668334376, |
| "eval_race_f1": 0.9482550954258615, |
| "eval_religion_accuracy": 0.9559099437148217, |
| "eval_religion_f1": 0.9545949788562016, |
| "eval_runtime": 11.0609, |
| "eval_safety_accuracy": 0.9153637690223057, |
| "eval_safety_f1": 0.9138861229157411, |
| "eval_samples_per_second": 867.383, |
| "eval_steps_per_second": 54.245, |
| "step": 4798 |
| }, |
| { |
| "epoch": 2.0008336807002918, |
| "grad_norm": 0.8514364957809448, |
| "learning_rate": 6.205838876570583e-05, |
| "loss": 0.1822, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.0425177157148813, |
| "grad_norm": 1.2515586614608765, |
| "learning_rate": 6.187361419068736e-05, |
| "loss": 0.1312, |
| "step": 4900 |
| }, |
| { |
| "epoch": 2.084201750729471, |
| "grad_norm": 1.2924548387527466, |
| "learning_rate": 6.168883961566887e-05, |
| "loss": 0.1307, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.12588578574406, |
| "grad_norm": 1.7066774368286133, |
| "learning_rate": 6.15040650406504e-05, |
| "loss": 0.1248, |
| "step": 5100 |
| }, |
| { |
| "epoch": 2.1675698207586493, |
| "grad_norm": 1.4067167043685913, |
| "learning_rate": 6.131929046563192e-05, |
| "loss": 0.1318, |
| "step": 5200 |
| }, |
| { |
| "epoch": 2.209253855773239, |
| "grad_norm": 2.4648754596710205, |
| "learning_rate": 6.113451589061345e-05, |
| "loss": 0.1293, |
| "step": 5300 |
| }, |
| { |
| "epoch": 2.2509378907878284, |
| "grad_norm": 2.232621669769287, |
| "learning_rate": 6.0949741315594965e-05, |
| "loss": 0.128, |
| "step": 5400 |
| }, |
| { |
| "epoch": 2.292621925802418, |
| "grad_norm": 2.5116114616394043, |
| "learning_rate": 6.076496674057649e-05, |
| "loss": 0.1328, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.334305960817007, |
| "grad_norm": 1.2505563497543335, |
| "learning_rate": 6.0580192165558015e-05, |
| "loss": 0.1284, |
| "step": 5600 |
| }, |
| { |
| "epoch": 2.3759899958315964, |
| "grad_norm": 1.3521802425384521, |
| "learning_rate": 6.039541759053954e-05, |
| "loss": 0.1345, |
| "step": 5700 |
| }, |
| { |
| "epoch": 2.417674030846186, |
| "grad_norm": 1.0144108533859253, |
| "learning_rate": 6.021064301552106e-05, |
| "loss": 0.1366, |
| "step": 5800 |
| }, |
| { |
| "epoch": 2.4593580658607754, |
| "grad_norm": 0.8403565287590027, |
| "learning_rate": 6.002586844050258e-05, |
| "loss": 0.1397, |
| "step": 5900 |
| }, |
| { |
| "epoch": 2.501042100875365, |
| "grad_norm": 2.634355068206787, |
| "learning_rate": 5.98410938654841e-05, |
| "loss": 0.1373, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.542726135889954, |
| "grad_norm": 3.5822253227233887, |
| "learning_rate": 5.965631929046563e-05, |
| "loss": 0.1291, |
| "step": 6100 |
| }, |
| { |
| "epoch": 2.5844101709045435, |
| "grad_norm": 1.4991543292999268, |
| "learning_rate": 5.947154471544715e-05, |
| "loss": 0.1475, |
| "step": 6200 |
| }, |
| { |
| "epoch": 2.626094205919133, |
| "grad_norm": 1.9590423107147217, |
| "learning_rate": 5.928677014042867e-05, |
| "loss": 0.1411, |
| "step": 6300 |
| }, |
| { |
| "epoch": 2.6677782409337225, |
| "grad_norm": 1.2120991945266724, |
| "learning_rate": 5.910199556541019e-05, |
| "loss": 0.1397, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.7094622759483116, |
| "grad_norm": 2.270709991455078, |
| "learning_rate": 5.8917220990391715e-05, |
| "loss": 0.1398, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.751146310962901, |
| "grad_norm": 2.1711227893829346, |
| "learning_rate": 5.873244641537324e-05, |
| "loss": 0.1344, |
| "step": 6600 |
| }, |
| { |
| "epoch": 2.7928303459774906, |
| "grad_norm": 3.177982807159424, |
| "learning_rate": 5.854951958610495e-05, |
| "loss": 0.1377, |
| "step": 6700 |
| }, |
| { |
| "epoch": 2.83451438099208, |
| "grad_norm": 1.8457061052322388, |
| "learning_rate": 5.8364745011086465e-05, |
| "loss": 0.1277, |
| "step": 6800 |
| }, |
| { |
| "epoch": 2.8761984160066696, |
| "grad_norm": 1.9990291595458984, |
| "learning_rate": 5.817997043606799e-05, |
| "loss": 0.1311, |
| "step": 6900 |
| }, |
| { |
| "epoch": 2.917882451021259, |
| "grad_norm": 1.8258967399597168, |
| "learning_rate": 5.7995195861049516e-05, |
| "loss": 0.1359, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.959566486035848, |
| "grad_norm": 0.9961782097816467, |
| "learning_rate": 5.781042128603104e-05, |
| "loss": 0.1416, |
| "step": 7100 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_administration_accuracy": 0.9374609130706691, |
| "eval_administration_f1": 0.9317222442324985, |
| "eval_corruption_accuracy": 0.9572649572649573, |
| "eval_corruption_f1": 0.9537446744289325, |
| "eval_democracy_accuracy": 0.9529914529914529, |
| "eval_democracy_f1": 0.9494686606905594, |
| "eval_development_accuracy": 0.9147383781530123, |
| "eval_development_f1": 0.9100235798889205, |
| "eval_economy_accuracy": 0.9329789451740671, |
| "eval_economy_f1": 0.9281892557992357, |
| "eval_education_accuracy": 0.964665415884928, |
| "eval_education_f1": 0.9620886841561839, |
| "eval_environment_accuracy": 0.980091723994163, |
| "eval_environment_f1": 0.9785258941308824, |
| "eval_instability_accuracy": 0.943089430894309, |
| "eval_instability_f1": 0.9426570245612531, |
| "eval_leadership_accuracy": 0.8357306649989577, |
| "eval_leadership_f1": 0.8319611907018966, |
| "eval_loss": 0.21395592391490936, |
| "eval_overall_accuracy": 0.9376433187408796, |
| "eval_overall_f1": 0.9341971854179941, |
| "eval_race_accuracy": 0.9546591619762351, |
| "eval_race_f1": 0.9516178104963666, |
| "eval_religion_accuracy": 0.9573691890765061, |
| "eval_religion_f1": 0.9546439142787715, |
| "eval_runtime": 10.9576, |
| "eval_safety_accuracy": 0.9206795914112987, |
| "eval_safety_f1": 0.9157232916504305, |
| "eval_samples_per_second": 875.555, |
| "eval_steps_per_second": 54.756, |
| "step": 7197 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 38384, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 16, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 2, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.989544340573389e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": { |
| "gradient_accumulation_steps": 1, |
| "learning_rate": 7e-05, |
| "num_train_epochs": 16 |
| } |
| } |
|
|