| { |
| "best_global_step": 7197, |
| "best_metric": 0.9341971854179941, |
| "best_model_checkpoint": "./results/run-3/checkpoint-7197", |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 11995, |
| "is_hyper_param_search": true, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.041684035014589414, |
| "grad_norm": 1.7545671463012695, |
| "learning_rate": 1.386e-05, |
| "loss": 1.2954, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08336807002917883, |
| "grad_norm": 0.7225435376167297, |
| "learning_rate": 2.7859999999999998e-05, |
| "loss": 0.5134, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.12505210504376824, |
| "grad_norm": 0.9463444948196411, |
| "learning_rate": 4.1859999999999996e-05, |
| "loss": 0.4399, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.16673614005835766, |
| "grad_norm": 0.8181287050247192, |
| "learning_rate": 5.586e-05, |
| "loss": 0.3608, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.20842017507294705, |
| "grad_norm": 1.4074561595916748, |
| "learning_rate": 6.986e-05, |
| "loss": 0.3303, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.25010421008753647, |
| "grad_norm": 1.232994556427002, |
| "learning_rate": 6.98170731707317e-05, |
| "loss": 0.3118, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.29178824510212586, |
| "grad_norm": 1.1151797771453857, |
| "learning_rate": 6.96341463414634e-05, |
| "loss": 0.3067, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.3334722801167153, |
| "grad_norm": 1.3453563451766968, |
| "learning_rate": 6.944937176644493e-05, |
| "loss": 0.2959, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.3751563151313047, |
| "grad_norm": 1.3014110326766968, |
| "learning_rate": 6.926459719142646e-05, |
| "loss": 0.2803, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4168403501458941, |
| "grad_norm": 2.4783546924591064, |
| "learning_rate": 6.907982261640798e-05, |
| "loss": 0.275, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.45852438516048355, |
| "grad_norm": 1.0991694927215576, |
| "learning_rate": 6.88950480413895e-05, |
| "loss": 0.2725, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.5002084201750729, |
| "grad_norm": 1.8534690141677856, |
| "learning_rate": 6.871027346637102e-05, |
| "loss": 0.2965, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.5418924551896623, |
| "grad_norm": 1.533751130104065, |
| "learning_rate": 6.852549889135254e-05, |
| "loss": 0.257, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.5835764902042517, |
| "grad_norm": 1.39555025100708, |
| "learning_rate": 6.834072431633407e-05, |
| "loss": 0.2666, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.6252605252188412, |
| "grad_norm": 1.1627668142318726, |
| "learning_rate": 6.81559497413156e-05, |
| "loss": 0.2461, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6669445602334306, |
| "grad_norm": 1.319166660308838, |
| "learning_rate": 6.79711751662971e-05, |
| "loss": 0.242, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.70862859524802, |
| "grad_norm": 1.3535823822021484, |
| "learning_rate": 6.778640059127863e-05, |
| "loss": 0.2463, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.7503126302626094, |
| "grad_norm": 1.8609535694122314, |
| "learning_rate": 6.760162601626016e-05, |
| "loss": 0.2367, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.7919966652771988, |
| "grad_norm": 1.86128830909729, |
| "learning_rate": 6.741685144124168e-05, |
| "loss": 0.2449, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.8336807002917882, |
| "grad_norm": 1.8068938255310059, |
| "learning_rate": 6.723207686622321e-05, |
| "loss": 0.2329, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.8753647353063777, |
| "grad_norm": 1.4341034889221191, |
| "learning_rate": 6.704730229120472e-05, |
| "loss": 0.2342, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.9170487703209671, |
| "grad_norm": 0.7307045459747314, |
| "learning_rate": 6.686252771618624e-05, |
| "loss": 0.2358, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.9587328053355565, |
| "grad_norm": 2.0308947563171387, |
| "learning_rate": 6.667775314116777e-05, |
| "loss": 0.2244, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_administration_accuracy": 0.9317281634354805, |
| "eval_administration_f1": 0.9110301299328539, |
| "eval_corruption_accuracy": 0.9541380029184907, |
| "eval_corruption_f1": 0.9489574147600867, |
| "eval_democracy_accuracy": 0.9473629351678132, |
| "eval_democracy_f1": 0.935646230598331, |
| "eval_development_accuracy": 0.910464873879508, |
| "eval_development_f1": 0.9008646598644421, |
| "eval_economy_accuracy": 0.9235980821346675, |
| "eval_economy_f1": 0.9156600494130359, |
| "eval_education_accuracy": 0.9622680842193038, |
| "eval_education_f1": 0.9586233907920864, |
| "eval_environment_accuracy": 0.978319783197832, |
| "eval_environment_f1": 0.9770754665605412, |
| "eval_instability_accuracy": 0.9398582447362935, |
| "eval_instability_f1": 0.9299109706262821, |
| "eval_leadership_accuracy": 0.8081092349385033, |
| "eval_leadership_f1": 0.7818188134942813, |
| "eval_loss": 0.2264394313097, |
| "eval_overall_accuracy": 0.9310245987075256, |
| "eval_overall_f1": 0.9219705720705208, |
| "eval_race_accuracy": 0.9487179487179487, |
| "eval_race_f1": 0.9448562040025291, |
| "eval_religion_accuracy": 0.9527829893683553, |
| "eval_religion_f1": 0.950851997767183, |
| "eval_runtime": 11.1066, |
| "eval_safety_accuracy": 0.9149468417761101, |
| "eval_safety_f1": 0.9083515370345979, |
| "eval_samples_per_second": 863.809, |
| "eval_steps_per_second": 54.022, |
| "step": 2399 |
| }, |
| { |
| "epoch": 1.0004168403501459, |
| "grad_norm": 1.3034080266952515, |
| "learning_rate": 6.64929785661493e-05, |
| "loss": 0.2417, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.0421008753647354, |
| "grad_norm": 1.3565603494644165, |
| "learning_rate": 6.630820399113082e-05, |
| "loss": 0.1946, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.0837849103793247, |
| "grad_norm": 2.098259449005127, |
| "learning_rate": 6.612342941611233e-05, |
| "loss": 0.1911, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.1254689453939142, |
| "grad_norm": 1.0764553546905518, |
| "learning_rate": 6.593865484109386e-05, |
| "loss": 0.1821, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.1671529804085035, |
| "grad_norm": 1.4594541788101196, |
| "learning_rate": 6.575388026607538e-05, |
| "loss": 0.1851, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.208837015423093, |
| "grad_norm": 1.8065540790557861, |
| "learning_rate": 6.556910569105691e-05, |
| "loss": 0.1929, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.2505210504376825, |
| "grad_norm": 0.8411226272583008, |
| "learning_rate": 6.538433111603843e-05, |
| "loss": 0.1882, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.2922050854522718, |
| "grad_norm": 1.886057734489441, |
| "learning_rate": 6.519955654101994e-05, |
| "loss": 0.1861, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.3338891204668613, |
| "grad_norm": 1.2633222341537476, |
| "learning_rate": 6.501478196600147e-05, |
| "loss": 0.1816, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.3755731554814505, |
| "grad_norm": 2.1033339500427246, |
| "learning_rate": 6.4830007390983e-05, |
| "loss": 0.1887, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.41725719049604, |
| "grad_norm": 1.5505166053771973, |
| "learning_rate": 6.464523281596452e-05, |
| "loss": 0.1946, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.4589412255106295, |
| "grad_norm": 1.7376617193222046, |
| "learning_rate": 6.446045824094603e-05, |
| "loss": 0.1881, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.5006252605252188, |
| "grad_norm": 1.8713116645812988, |
| "learning_rate": 6.427568366592756e-05, |
| "loss": 0.1742, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.5423092955398081, |
| "grad_norm": 1.2346725463867188, |
| "learning_rate": 6.409090909090908e-05, |
| "loss": 0.1733, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.5839933305543976, |
| "grad_norm": 1.5446250438690186, |
| "learning_rate": 6.390613451589061e-05, |
| "loss": 0.1885, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.6256773655689871, |
| "grad_norm": 2.538613796234131, |
| "learning_rate": 6.372135994087213e-05, |
| "loss": 0.1737, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.6673614005835766, |
| "grad_norm": 1.1538047790527344, |
| "learning_rate": 6.353658536585365e-05, |
| "loss": 0.1836, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.709045435598166, |
| "grad_norm": 2.285994291305542, |
| "learning_rate": 6.335181079083517e-05, |
| "loss": 0.1934, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.7507294706127552, |
| "grad_norm": 1.3632721900939941, |
| "learning_rate": 6.31670362158167e-05, |
| "loss": 0.1792, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.7924135056273447, |
| "grad_norm": 1.3432073593139648, |
| "learning_rate": 6.298226164079822e-05, |
| "loss": 0.1665, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.8340975406419342, |
| "grad_norm": 4.086724758148193, |
| "learning_rate": 6.279748706577975e-05, |
| "loss": 0.1761, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.8757815756565237, |
| "grad_norm": 1.1902227401733398, |
| "learning_rate": 6.261271249076126e-05, |
| "loss": 0.1738, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.917465610671113, |
| "grad_norm": 2.1310391426086426, |
| "learning_rate": 6.242793791574278e-05, |
| "loss": 0.1822, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.9591496456857023, |
| "grad_norm": 1.384017825126648, |
| "learning_rate": 6.224316334072431e-05, |
| "loss": 0.1827, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_administration_accuracy": 0.9382947675630603, |
| "eval_administration_f1": 0.93083241604641, |
| "eval_corruption_accuracy": 0.9589326662497394, |
| "eval_corruption_f1": 0.9564057119817679, |
| "eval_democracy_accuracy": 0.9522618303106108, |
| "eval_democracy_f1": 0.9459837872310805, |
| "eval_development_accuracy": 0.9119241192411924, |
| "eval_development_f1": 0.9077803592136469, |
| "eval_economy_accuracy": 0.9272461955388784, |
| "eval_economy_f1": 0.9256537091607892, |
| "eval_education_accuracy": 0.9613299979153638, |
| "eval_education_f1": 0.9608415429902122, |
| "eval_environment_accuracy": 0.978319783197832, |
| "eval_environment_f1": 0.9774790124713261, |
| "eval_instability_accuracy": 0.9247446320617052, |
| "eval_instability_f1": 0.9296973340071476, |
| "eval_leadership_accuracy": 0.8296852199291224, |
| "eval_leadership_f1": 0.8138352706726838, |
| "eval_loss": 0.21539363265037537, |
| "eval_overall_accuracy": 0.9336477659648392, |
| "eval_overall_f1": 0.9304371117477391, |
| "eval_race_accuracy": 0.9497602668334376, |
| "eval_race_f1": 0.9482550954258615, |
| "eval_religion_accuracy": 0.9559099437148217, |
| "eval_religion_f1": 0.9545949788562016, |
| "eval_runtime": 11.0609, |
| "eval_safety_accuracy": 0.9153637690223057, |
| "eval_safety_f1": 0.9138861229157411, |
| "eval_samples_per_second": 867.383, |
| "eval_steps_per_second": 54.245, |
| "step": 4798 |
| }, |
| { |
| "epoch": 2.0008336807002918, |
| "grad_norm": 0.8514364957809448, |
| "learning_rate": 6.205838876570583e-05, |
| "loss": 0.1822, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.0425177157148813, |
| "grad_norm": 1.2515586614608765, |
| "learning_rate": 6.187361419068736e-05, |
| "loss": 0.1312, |
| "step": 4900 |
| }, |
| { |
| "epoch": 2.084201750729471, |
| "grad_norm": 1.2924548387527466, |
| "learning_rate": 6.168883961566887e-05, |
| "loss": 0.1307, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.12588578574406, |
| "grad_norm": 1.7066774368286133, |
| "learning_rate": 6.15040650406504e-05, |
| "loss": 0.1248, |
| "step": 5100 |
| }, |
| { |
| "epoch": 2.1675698207586493, |
| "grad_norm": 1.4067167043685913, |
| "learning_rate": 6.131929046563192e-05, |
| "loss": 0.1318, |
| "step": 5200 |
| }, |
| { |
| "epoch": 2.209253855773239, |
| "grad_norm": 2.4648754596710205, |
| "learning_rate": 6.113451589061345e-05, |
| "loss": 0.1293, |
| "step": 5300 |
| }, |
| { |
| "epoch": 2.2509378907878284, |
| "grad_norm": 2.232621669769287, |
| "learning_rate": 6.0949741315594965e-05, |
| "loss": 0.128, |
| "step": 5400 |
| }, |
| { |
| "epoch": 2.292621925802418, |
| "grad_norm": 2.5116114616394043, |
| "learning_rate": 6.076496674057649e-05, |
| "loss": 0.1328, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.334305960817007, |
| "grad_norm": 1.2505563497543335, |
| "learning_rate": 6.0580192165558015e-05, |
| "loss": 0.1284, |
| "step": 5600 |
| }, |
| { |
| "epoch": 2.3759899958315964, |
| "grad_norm": 1.3521802425384521, |
| "learning_rate": 6.039541759053954e-05, |
| "loss": 0.1345, |
| "step": 5700 |
| }, |
| { |
| "epoch": 2.417674030846186, |
| "grad_norm": 1.0144108533859253, |
| "learning_rate": 6.021064301552106e-05, |
| "loss": 0.1366, |
| "step": 5800 |
| }, |
| { |
| "epoch": 2.4593580658607754, |
| "grad_norm": 0.8403565287590027, |
| "learning_rate": 6.002586844050258e-05, |
| "loss": 0.1397, |
| "step": 5900 |
| }, |
| { |
| "epoch": 2.501042100875365, |
| "grad_norm": 2.634355068206787, |
| "learning_rate": 5.98410938654841e-05, |
| "loss": 0.1373, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.542726135889954, |
| "grad_norm": 3.5822253227233887, |
| "learning_rate": 5.965631929046563e-05, |
| "loss": 0.1291, |
| "step": 6100 |
| }, |
| { |
| "epoch": 2.5844101709045435, |
| "grad_norm": 1.4991543292999268, |
| "learning_rate": 5.947154471544715e-05, |
| "loss": 0.1475, |
| "step": 6200 |
| }, |
| { |
| "epoch": 2.626094205919133, |
| "grad_norm": 1.9590423107147217, |
| "learning_rate": 5.928677014042867e-05, |
| "loss": 0.1411, |
| "step": 6300 |
| }, |
| { |
| "epoch": 2.6677782409337225, |
| "grad_norm": 1.2120991945266724, |
| "learning_rate": 5.910199556541019e-05, |
| "loss": 0.1397, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.7094622759483116, |
| "grad_norm": 2.270709991455078, |
| "learning_rate": 5.8917220990391715e-05, |
| "loss": 0.1398, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.751146310962901, |
| "grad_norm": 2.1711227893829346, |
| "learning_rate": 5.873244641537324e-05, |
| "loss": 0.1344, |
| "step": 6600 |
| }, |
| { |
| "epoch": 2.7928303459774906, |
| "grad_norm": 3.177982807159424, |
| "learning_rate": 5.854951958610495e-05, |
| "loss": 0.1377, |
| "step": 6700 |
| }, |
| { |
| "epoch": 2.83451438099208, |
| "grad_norm": 1.8457061052322388, |
| "learning_rate": 5.8364745011086465e-05, |
| "loss": 0.1277, |
| "step": 6800 |
| }, |
| { |
| "epoch": 2.8761984160066696, |
| "grad_norm": 1.9990291595458984, |
| "learning_rate": 5.817997043606799e-05, |
| "loss": 0.1311, |
| "step": 6900 |
| }, |
| { |
| "epoch": 2.917882451021259, |
| "grad_norm": 1.8258967399597168, |
| "learning_rate": 5.7995195861049516e-05, |
| "loss": 0.1359, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.959566486035848, |
| "grad_norm": 0.9961782097816467, |
| "learning_rate": 5.781042128603104e-05, |
| "loss": 0.1416, |
| "step": 7100 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_administration_accuracy": 0.9374609130706691, |
| "eval_administration_f1": 0.9317222442324985, |
| "eval_corruption_accuracy": 0.9572649572649573, |
| "eval_corruption_f1": 0.9537446744289325, |
| "eval_democracy_accuracy": 0.9529914529914529, |
| "eval_democracy_f1": 0.9494686606905594, |
| "eval_development_accuracy": 0.9147383781530123, |
| "eval_development_f1": 0.9100235798889205, |
| "eval_economy_accuracy": 0.9329789451740671, |
| "eval_economy_f1": 0.9281892557992357, |
| "eval_education_accuracy": 0.964665415884928, |
| "eval_education_f1": 0.9620886841561839, |
| "eval_environment_accuracy": 0.980091723994163, |
| "eval_environment_f1": 0.9785258941308824, |
| "eval_instability_accuracy": 0.943089430894309, |
| "eval_instability_f1": 0.9426570245612531, |
| "eval_leadership_accuracy": 0.8357306649989577, |
| "eval_leadership_f1": 0.8319611907018966, |
| "eval_loss": 0.21395592391490936, |
| "eval_overall_accuracy": 0.9376433187408796, |
| "eval_overall_f1": 0.9341971854179941, |
| "eval_race_accuracy": 0.9546591619762351, |
| "eval_race_f1": 0.9516178104963666, |
| "eval_religion_accuracy": 0.9573691890765061, |
| "eval_religion_f1": 0.9546439142787715, |
| "eval_runtime": 10.9576, |
| "eval_safety_accuracy": 0.9206795914112987, |
| "eval_safety_f1": 0.9157232916504305, |
| "eval_samples_per_second": 875.555, |
| "eval_steps_per_second": 54.756, |
| "step": 7197 |
| }, |
| { |
| "epoch": 3.0012505210504377, |
| "grad_norm": 1.1601636409759521, |
| "learning_rate": 5.762564671101256e-05, |
| "loss": 0.1347, |
| "step": 7200 |
| }, |
| { |
| "epoch": 3.042934556065027, |
| "grad_norm": 2.3385512828826904, |
| "learning_rate": 5.744087213599408e-05, |
| "loss": 0.1002, |
| "step": 7300 |
| }, |
| { |
| "epoch": 3.0846185910796167, |
| "grad_norm": 0.9694398641586304, |
| "learning_rate": 5.7256097560975603e-05, |
| "loss": 0.0957, |
| "step": 7400 |
| }, |
| { |
| "epoch": 3.1263026260942057, |
| "grad_norm": 1.703196406364441, |
| "learning_rate": 5.707132298595713e-05, |
| "loss": 0.1002, |
| "step": 7500 |
| }, |
| { |
| "epoch": 3.1679866611087952, |
| "grad_norm": 2.1096134185791016, |
| "learning_rate": 5.6886548410938654e-05, |
| "loss": 0.1061, |
| "step": 7600 |
| }, |
| { |
| "epoch": 3.2096706961233847, |
| "grad_norm": 1.8544224500656128, |
| "learning_rate": 5.6701773835920166e-05, |
| "loss": 0.0964, |
| "step": 7700 |
| }, |
| { |
| "epoch": 3.2513547311379742, |
| "grad_norm": 1.9971802234649658, |
| "learning_rate": 5.651699926090169e-05, |
| "loss": 0.1036, |
| "step": 7800 |
| }, |
| { |
| "epoch": 3.2930387661525637, |
| "grad_norm": 1.9724899530410767, |
| "learning_rate": 5.6332224685883216e-05, |
| "loss": 0.0992, |
| "step": 7900 |
| }, |
| { |
| "epoch": 3.334722801167153, |
| "grad_norm": 3.2631657123565674, |
| "learning_rate": 5.614745011086474e-05, |
| "loss": 0.1022, |
| "step": 8000 |
| }, |
| { |
| "epoch": 3.3764068361817423, |
| "grad_norm": 1.6194812059402466, |
| "learning_rate": 5.596267553584627e-05, |
| "loss": 0.1005, |
| "step": 8100 |
| }, |
| { |
| "epoch": 3.418090871196332, |
| "grad_norm": 1.889183759689331, |
| "learning_rate": 5.577790096082778e-05, |
| "loss": 0.0981, |
| "step": 8200 |
| }, |
| { |
| "epoch": 3.4597749062109213, |
| "grad_norm": 1.4375004768371582, |
| "learning_rate": 5.5593126385809304e-05, |
| "loss": 0.1056, |
| "step": 8300 |
| }, |
| { |
| "epoch": 3.5014589412255104, |
| "grad_norm": 1.5392543077468872, |
| "learning_rate": 5.540835181079083e-05, |
| "loss": 0.1039, |
| "step": 8400 |
| }, |
| { |
| "epoch": 3.5431429762401, |
| "grad_norm": 2.7214481830596924, |
| "learning_rate": 5.5223577235772354e-05, |
| "loss": 0.0974, |
| "step": 8500 |
| }, |
| { |
| "epoch": 3.5848270112546894, |
| "grad_norm": 3.4187774658203125, |
| "learning_rate": 5.503880266075388e-05, |
| "loss": 0.1095, |
| "step": 8600 |
| }, |
| { |
| "epoch": 3.626511046269279, |
| "grad_norm": 2.25942063331604, |
| "learning_rate": 5.485402808573539e-05, |
| "loss": 0.0982, |
| "step": 8700 |
| }, |
| { |
| "epoch": 3.6681950812838684, |
| "grad_norm": 1.933835506439209, |
| "learning_rate": 5.4669253510716916e-05, |
| "loss": 0.1058, |
| "step": 8800 |
| }, |
| { |
| "epoch": 3.709879116298458, |
| "grad_norm": 1.7486668825149536, |
| "learning_rate": 5.448447893569844e-05, |
| "loss": 0.099, |
| "step": 8900 |
| }, |
| { |
| "epoch": 3.751563151313047, |
| "grad_norm": 1.5157849788665771, |
| "learning_rate": 5.429970436067997e-05, |
| "loss": 0.1014, |
| "step": 9000 |
| }, |
| { |
| "epoch": 3.7932471863276365, |
| "grad_norm": 2.798487663269043, |
| "learning_rate": 5.411492978566149e-05, |
| "loss": 0.1122, |
| "step": 9100 |
| }, |
| { |
| "epoch": 3.834931221342226, |
| "grad_norm": 1.6031073331832886, |
| "learning_rate": 5.393015521064302e-05, |
| "loss": 0.1142, |
| "step": 9200 |
| }, |
| { |
| "epoch": 3.8766152563568155, |
| "grad_norm": 1.2697032690048218, |
| "learning_rate": 5.374538063562453e-05, |
| "loss": 0.0987, |
| "step": 9300 |
| }, |
| { |
| "epoch": 3.9182992913714045, |
| "grad_norm": 2.0120253562927246, |
| "learning_rate": 5.3560606060606054e-05, |
| "loss": 0.0994, |
| "step": 9400 |
| }, |
| { |
| "epoch": 3.959983326385994, |
| "grad_norm": 1.9906800985336304, |
| "learning_rate": 5.337583148558758e-05, |
| "loss": 0.1022, |
| "step": 9500 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_administration_accuracy": 0.9391286220554513, |
| "eval_administration_f1": 0.9314051851573208, |
| "eval_corruption_accuracy": 0.9487179487179487, |
| "eval_corruption_f1": 0.9492616863366112, |
| "eval_democracy_accuracy": 0.9540337711069419, |
| "eval_democracy_f1": 0.9486322890870622, |
| "eval_development_accuracy": 0.9184907233687721, |
| "eval_development_f1": 0.9117009476732075, |
| "eval_economy_accuracy": 0.9196372732958099, |
| "eval_economy_f1": 0.9214363800488883, |
| "eval_education_accuracy": 0.9672712111736502, |
| "eval_education_f1": 0.9642091841735867, |
| "eval_environment_accuracy": 0.9809255784865542, |
| "eval_environment_f1": 0.9795730735612355, |
| "eval_instability_accuracy": 0.9282885136543673, |
| "eval_instability_f1": 0.9323212368699592, |
| "eval_leadership_accuracy": 0.8347925786950178, |
| "eval_leadership_f1": 0.8300304516685039, |
| "eval_loss": 0.24011321365833282, |
| "eval_overall_accuracy": 0.9345250503787087, |
| "eval_overall_f1": 0.9324514217297923, |
| "eval_race_accuracy": 0.9515322076297686, |
| "eval_race_f1": 0.9511929125632914, |
| "eval_religion_accuracy": 0.9595580571190327, |
| "eval_religion_f1": 0.9582076782235817, |
| "eval_runtime": 10.9263, |
| "eval_safety_accuracy": 0.9119241192411924, |
| "eval_safety_f1": 0.9114460353942603, |
| "eval_samples_per_second": 878.068, |
| "eval_steps_per_second": 54.914, |
| "step": 9596 |
| }, |
| { |
| "epoch": 4.0016673614005835, |
| "grad_norm": 1.2328238487243652, |
| "learning_rate": 5.3191056910569105e-05, |
| "loss": 0.1003, |
| "step": 9600 |
| }, |
| { |
| "epoch": 4.043351396415173, |
| "grad_norm": 0.6457404494285583, |
| "learning_rate": 5.300628233555063e-05, |
| "loss": 0.0663, |
| "step": 9700 |
| }, |
| { |
| "epoch": 4.0850354314297626, |
| "grad_norm": 2.586268901824951, |
| "learning_rate": 5.282150776053214e-05, |
| "loss": 0.077, |
| "step": 9800 |
| }, |
| { |
| "epoch": 4.126719466444352, |
| "grad_norm": 2.192460060119629, |
| "learning_rate": 5.263673318551367e-05, |
| "loss": 0.0707, |
| "step": 9900 |
| }, |
| { |
| "epoch": 4.168403501458942, |
| "grad_norm": 0.8038670420646667, |
| "learning_rate": 5.245195861049519e-05, |
| "loss": 0.0697, |
| "step": 10000 |
| }, |
| { |
| "epoch": 4.210087536473531, |
| "grad_norm": 1.8463445901870728, |
| "learning_rate": 5.22690317812269e-05, |
| "loss": 0.0842, |
| "step": 10100 |
| }, |
| { |
| "epoch": 4.25177157148812, |
| "grad_norm": 1.395668387413025, |
| "learning_rate": 5.2084257206208424e-05, |
| "loss": 0.0862, |
| "step": 10200 |
| }, |
| { |
| "epoch": 4.293455606502709, |
| "grad_norm": 2.2199463844299316, |
| "learning_rate": 5.189948263118994e-05, |
| "loss": 0.0795, |
| "step": 10300 |
| }, |
| { |
| "epoch": 4.335139641517299, |
| "grad_norm": 1.7197673320770264, |
| "learning_rate": 5.171470805617147e-05, |
| "loss": 0.0784, |
| "step": 10400 |
| }, |
| { |
| "epoch": 4.376823676531888, |
| "grad_norm": 0.9125378131866455, |
| "learning_rate": 5.1529933481152986e-05, |
| "loss": 0.0875, |
| "step": 10500 |
| }, |
| { |
| "epoch": 4.418507711546478, |
| "grad_norm": 1.349757194519043, |
| "learning_rate": 5.134515890613451e-05, |
| "loss": 0.0778, |
| "step": 10600 |
| }, |
| { |
| "epoch": 4.460191746561067, |
| "grad_norm": 1.304445505142212, |
| "learning_rate": 5.1160384331116037e-05, |
| "loss": 0.0863, |
| "step": 10700 |
| }, |
| { |
| "epoch": 4.501875781575657, |
| "grad_norm": 1.316042184829712, |
| "learning_rate": 5.0975609756097555e-05, |
| "loss": 0.0758, |
| "step": 10800 |
| }, |
| { |
| "epoch": 4.543559816590246, |
| "grad_norm": 1.7575550079345703, |
| "learning_rate": 5.079083518107908e-05, |
| "loss": 0.079, |
| "step": 10900 |
| }, |
| { |
| "epoch": 4.585243851604836, |
| "grad_norm": 3.9689066410064697, |
| "learning_rate": 5.06060606060606e-05, |
| "loss": 0.0726, |
| "step": 11000 |
| }, |
| { |
| "epoch": 4.626927886619425, |
| "grad_norm": 3.6912765502929688, |
| "learning_rate": 5.0421286031042124e-05, |
| "loss": 0.0795, |
| "step": 11100 |
| }, |
| { |
| "epoch": 4.668611921634014, |
| "grad_norm": 2.2863271236419678, |
| "learning_rate": 5.023651145602365e-05, |
| "loss": 0.0796, |
| "step": 11200 |
| }, |
| { |
| "epoch": 4.710295956648603, |
| "grad_norm": 0.5653240084648132, |
| "learning_rate": 5.005173688100517e-05, |
| "loss": 0.0798, |
| "step": 11300 |
| }, |
| { |
| "epoch": 4.751979991663193, |
| "grad_norm": 0.5571924448013306, |
| "learning_rate": 4.986696230598669e-05, |
| "loss": 0.0784, |
| "step": 11400 |
| }, |
| { |
| "epoch": 4.793664026677782, |
| "grad_norm": 2.2439966201782227, |
| "learning_rate": 4.968218773096822e-05, |
| "loss": 0.0814, |
| "step": 11500 |
| }, |
| { |
| "epoch": 4.835348061692372, |
| "grad_norm": 3.0010180473327637, |
| "learning_rate": 4.949741315594974e-05, |
| "loss": 0.0744, |
| "step": 11600 |
| }, |
| { |
| "epoch": 4.877032096706961, |
| "grad_norm": 1.1905595064163208, |
| "learning_rate": 4.9312638580931255e-05, |
| "loss": 0.0801, |
| "step": 11700 |
| }, |
| { |
| "epoch": 4.918716131721551, |
| "grad_norm": 1.0284569263458252, |
| "learning_rate": 4.912786400591278e-05, |
| "loss": 0.0801, |
| "step": 11800 |
| }, |
| { |
| "epoch": 4.96040016673614, |
| "grad_norm": 1.0152745246887207, |
| "learning_rate": 4.8943089430894306e-05, |
| "loss": 0.0713, |
| "step": 11900 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_administration_accuracy": 0.9377736085053158, |
| "eval_administration_f1": 0.9276824923430157, |
| "eval_corruption_accuracy": 0.9577861163227017, |
| "eval_corruption_f1": 0.9538795910231914, |
| "eval_democracy_accuracy": 0.9540337711069419, |
| "eval_democracy_f1": 0.9502590354742151, |
| "eval_development_accuracy": 0.9175526370648321, |
| "eval_development_f1": 0.9108745507581222, |
| "eval_economy_accuracy": 0.9300604544506984, |
| "eval_economy_f1": 0.9260225965314521, |
| "eval_education_accuracy": 0.9670627475505524, |
| "eval_education_f1": 0.9645423346801522, |
| "eval_environment_accuracy": 0.9807171148634564, |
| "eval_environment_f1": 0.978908255723529, |
| "eval_instability_accuracy": 0.9444444444444444, |
| "eval_instability_f1": 0.9428908884341405, |
| "eval_leadership_accuracy": 0.8395872420262664, |
| "eval_leadership_f1": 0.8349609938727663, |
| "eval_loss": 0.25315749645233154, |
| "eval_overall_accuracy": 0.9378865263011605, |
| "eval_overall_f1": 0.9340809877025569, |
| "eval_race_accuracy": 0.9501771940796331, |
| "eval_race_f1": 0.949342954475387, |
| "eval_religion_accuracy": 0.9579945799457995, |
| "eval_religion_f1": 0.9551951539975265, |
| "eval_runtime": 10.9532, |
| "eval_safety_accuracy": 0.9174484052532833, |
| "eval_safety_f1": 0.9144130051171864, |
| "eval_samples_per_second": 875.907, |
| "eval_steps_per_second": 54.778, |
| "step": 11995 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 38384, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 16, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 2, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 2 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.010440095766938e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": { |
| "gradient_accumulation_steps": 1, |
| "learning_rate": 7e-05, |
| "num_train_epochs": 16 |
| } |
| } |
|
|