| { |
| "best_global_step": 2400, |
| "best_metric": 0.9399009152601657, |
| "best_model_checkpoint": "./results/run-4/checkpoint-2400", |
| "epoch": 4.0, |
| "eval_steps": 500, |
| "global_step": 2400, |
| "is_hyper_param_search": true, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.16673614005835766, |
| "grad_norm": 1.7758623361587524, |
| "learning_rate": 1e-05, |
| "loss": 1.3361, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3334722801167153, |
| "grad_norm": 0.6607375144958496, |
| "learning_rate": 2e-05, |
| "loss": 0.5099, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5002084201750729, |
| "grad_norm": 0.5690075159072876, |
| "learning_rate": 3e-05, |
| "loss": 0.4223, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6669445602334306, |
| "grad_norm": 0.848582923412323, |
| "learning_rate": 4e-05, |
| "loss": 0.3466, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8336807002917882, |
| "grad_norm": 0.7310414910316467, |
| "learning_rate": 5e-05, |
| "loss": 0.3032, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.5762994289398193, |
| "learning_rate": 4.7995991983967934e-05, |
| "loss": 0.2757, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_administration_accuracy": 0.9163018553262455, |
| "eval_administration_f1": 0.8799341161157713, |
| "eval_corruption_accuracy": 0.9427767354596623, |
| "eval_corruption_f1": 0.9297320702956003, |
| "eval_democracy_accuracy": 0.9418386491557224, |
| "eval_democracy_f1": 0.9228010908644549, |
| "eval_development_accuracy": 0.8867000208463623, |
| "eval_development_f1": 0.8490864044420574, |
| "eval_economy_accuracy": 0.9122368146758391, |
| "eval_economy_f1": 0.8967581427500766, |
| "eval_education_accuracy": 0.9583072753804461, |
| "eval_education_f1": 0.9515050567699045, |
| "eval_environment_accuracy": 0.9754012924744632, |
| "eval_environment_f1": 0.971384333060466, |
| "eval_instability_accuracy": 0.924119241192412, |
| "eval_instability_f1": 0.8935408759758452, |
| "eval_leadership_accuracy": 0.7805920366895976, |
| "eval_leadership_f1": 0.7320671383079426, |
| "eval_loss": 0.26127132773399353, |
| "eval_overall_accuracy": 0.9182127718713086, |
| "eval_overall_f1": 0.8968188861118868, |
| "eval_race_accuracy": 0.9360016677089847, |
| "eval_race_f1": 0.9209694401775296, |
| "eval_religion_accuracy": 0.9441317490097978, |
| "eval_religion_f1": 0.9328887566718251, |
| "eval_runtime": 10.8983, |
| "eval_safety_accuracy": 0.9001459245361685, |
| "eval_safety_f1": 0.8811592079111662, |
| "eval_samples_per_second": 880.318, |
| "eval_steps_per_second": 55.054, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.1667361400583576, |
| "grad_norm": 0.6828027367591858, |
| "learning_rate": 4.599198396793588e-05, |
| "loss": 0.2396, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.3334722801167154, |
| "grad_norm": 0.67964106798172, |
| "learning_rate": 4.398797595190381e-05, |
| "loss": 0.2312, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.500208420175073, |
| "grad_norm": 0.7818251252174377, |
| "learning_rate": 4.198396793587174e-05, |
| "loss": 0.2237, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.6669445602334307, |
| "grad_norm": 0.8807923197746277, |
| "learning_rate": 3.997995991983968e-05, |
| "loss": 0.2085, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.8336807002917883, |
| "grad_norm": 0.8586667776107788, |
| "learning_rate": 3.797595190380762e-05, |
| "loss": 0.2007, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.9014652371406555, |
| "learning_rate": 3.5971943887775555e-05, |
| "loss": 0.2, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_administration_accuracy": 0.9375651448822181, |
| "eval_administration_f1": 0.9290316070134363, |
| "eval_corruption_accuracy": 0.9589326662497394, |
| "eval_corruption_f1": 0.9550373529738804, |
| "eval_democracy_accuracy": 0.9521575984990619, |
| "eval_democracy_f1": 0.9433460958983528, |
| "eval_development_accuracy": 0.9138002918490723, |
| "eval_development_f1": 0.8974478772752896, |
| "eval_economy_accuracy": 0.9307900771315405, |
| "eval_economy_f1": 0.9241148503263762, |
| "eval_education_accuracy": 0.9664373566812591, |
| "eval_education_f1": 0.9649731388208568, |
| "eval_environment_accuracy": 0.9822805920366896, |
| "eval_environment_f1": 0.9806789699363689, |
| "eval_instability_accuracy": 0.9471544715447154, |
| "eval_instability_f1": 0.9400721946847931, |
| "eval_leadership_accuracy": 0.8124869710235564, |
| "eval_leadership_f1": 0.7871605097392734, |
| "eval_loss": 0.20422659814357758, |
| "eval_overall_accuracy": 0.9359582377875061, |
| "eval_overall_f1": 0.9285199435647921, |
| "eval_race_accuracy": 0.9536168438607463, |
| "eval_race_f1": 0.950384319474048, |
| "eval_religion_accuracy": 0.9576818845111528, |
| "eval_religion_f1": 0.956169902275164, |
| "eval_runtime": 11.0011, |
| "eval_safety_accuracy": 0.9185949551803211, |
| "eval_safety_f1": 0.913822504359663, |
| "eval_samples_per_second": 872.094, |
| "eval_steps_per_second": 54.54, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.1667361400583576, |
| "grad_norm": 0.7211724519729614, |
| "learning_rate": 3.3967935871743486e-05, |
| "loss": 0.1565, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.333472280116715, |
| "grad_norm": 0.8795008063316345, |
| "learning_rate": 3.1963927855711424e-05, |
| "loss": 0.1491, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.500208420175073, |
| "grad_norm": 0.9686936140060425, |
| "learning_rate": 2.9959919839679363e-05, |
| "loss": 0.1514, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.6669445602334307, |
| "grad_norm": 0.8608136773109436, |
| "learning_rate": 2.7955911823647297e-05, |
| "loss": 0.1495, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.8336807002917883, |
| "grad_norm": 0.9045342803001404, |
| "learning_rate": 2.595190380761523e-05, |
| "loss": 0.1433, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.7755804061889648, |
| "learning_rate": 2.3947895791583166e-05, |
| "loss": 0.1425, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_administration_accuracy": 0.9407963310402335, |
| "eval_administration_f1": 0.9350680574385688, |
| "eval_corruption_accuracy": 0.9607046070460704, |
| "eval_corruption_f1": 0.9599629345245179, |
| "eval_democracy_accuracy": 0.9569522618303106, |
| "eval_democracy_f1": 0.9539836532509403, |
| "eval_development_accuracy": 0.9252657911194496, |
| "eval_development_f1": 0.918888388565598, |
| "eval_economy_accuracy": 0.9362101313320825, |
| "eval_economy_f1": 0.9331727962045289, |
| "eval_education_accuracy": 0.9673754429851991, |
| "eval_education_f1": 0.9653887107414594, |
| "eval_environment_accuracy": 0.9832186783406296, |
| "eval_environment_f1": 0.9819944959159485, |
| "eval_instability_accuracy": 0.9492391077756931, |
| "eval_instability_f1": 0.9467721148205744, |
| "eval_leadership_accuracy": 0.8369814467375443, |
| "eval_leadership_f1": 0.8295655888282574, |
| "eval_loss": 0.189470112323761, |
| "eval_overall_accuracy": 0.94157806962685, |
| "eval_overall_f1": 0.9384264348969675, |
| "eval_race_accuracy": 0.957473420888055, |
| "eval_race_f1": 0.9558244178036731, |
| "eval_religion_accuracy": 0.960287679799875, |
| "eval_religion_f1": 0.9595273358731586, |
| "eval_runtime": 11.03, |
| "eval_safety_accuracy": 0.9244319366270586, |
| "eval_safety_f1": 0.9209687247963856, |
| "eval_samples_per_second": 869.81, |
| "eval_steps_per_second": 54.397, |
| "step": 1800 |
| }, |
| { |
| "epoch": 3.1667361400583576, |
| "grad_norm": 0.737460196018219, |
| "learning_rate": 2.1943887775551105e-05, |
| "loss": 0.1122, |
| "step": 1900 |
| }, |
| { |
| "epoch": 3.333472280116715, |
| "grad_norm": 0.7762249708175659, |
| "learning_rate": 1.993987975951904e-05, |
| "loss": 0.1122, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.500208420175073, |
| "grad_norm": 1.0788854360580444, |
| "learning_rate": 1.7935871743486977e-05, |
| "loss": 0.1083, |
| "step": 2100 |
| }, |
| { |
| "epoch": 3.6669445602334307, |
| "grad_norm": 0.7059413194656372, |
| "learning_rate": 1.593186372745491e-05, |
| "loss": 0.1081, |
| "step": 2200 |
| }, |
| { |
| "epoch": 3.8336807002917883, |
| "grad_norm": 0.9918854236602783, |
| "learning_rate": 1.3927855711422847e-05, |
| "loss": 0.1086, |
| "step": 2300 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.9893817901611328, |
| "learning_rate": 1.1923847695390781e-05, |
| "loss": 0.1024, |
| "step": 2400 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_administration_accuracy": 0.9428809672712112, |
| "eval_administration_f1": 0.938180470700318, |
| "eval_corruption_accuracy": 0.9623723160308526, |
| "eval_corruption_f1": 0.9612272557957268, |
| "eval_democracy_accuracy": 0.9570564936418595, |
| "eval_democracy_f1": 0.9540522922098595, |
| "eval_development_accuracy": 0.9252657911194496, |
| "eval_development_f1": 0.9205294368258055, |
| "eval_economy_accuracy": 0.9373566812591203, |
| "eval_economy_f1": 0.9360975905522984, |
| "eval_education_accuracy": 0.9690431519699813, |
| "eval_education_f1": 0.9675757412855236, |
| "eval_environment_accuracy": 0.9822805920366896, |
| "eval_environment_f1": 0.9813159598680739, |
| "eval_instability_accuracy": 0.9490306441525954, |
| "eval_instability_f1": 0.9457765995039474, |
| "eval_leadership_accuracy": 0.8419845736918907, |
| "eval_leadership_f1": 0.8366095676531852, |
| "eval_loss": 0.1909668743610382, |
| "eval_overall_accuracy": 0.9425769578208602, |
| "eval_overall_f1": 0.9399009152601657, |
| "eval_race_accuracy": 0.958411507191995, |
| "eval_race_f1": 0.9572260766271818, |
| "eval_religion_accuracy": 0.9600792161767772, |
| "eval_religion_f1": 0.9597895636198337, |
| "eval_runtime": 11.044, |
| "eval_safety_accuracy": 0.9251615593079008, |
| "eval_safety_f1": 0.9204304284802344, |
| "eval_samples_per_second": 868.704, |
| "eval_steps_per_second": 54.328, |
| "step": 2400 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 2995, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 2, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.040106904151654e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": { |
| "gradient_accumulation_steps": 4, |
| "learning_rate": 5e-05, |
| "num_train_epochs": 5 |
| } |
| } |
|
|