| { |
| "best_global_step": 1200, |
| "best_metric": 0.9285199435647921, |
| "best_model_checkpoint": "./results/run-4/checkpoint-1200", |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 1200, |
| "is_hyper_param_search": true, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.16673614005835766, |
| "grad_norm": 1.7758623361587524, |
| "learning_rate": 1e-05, |
| "loss": 1.3361, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3334722801167153, |
| "grad_norm": 0.6607375144958496, |
| "learning_rate": 2e-05, |
| "loss": 0.5099, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5002084201750729, |
| "grad_norm": 0.5690075159072876, |
| "learning_rate": 3e-05, |
| "loss": 0.4223, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6669445602334306, |
| "grad_norm": 0.848582923412323, |
| "learning_rate": 4e-05, |
| "loss": 0.3466, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8336807002917882, |
| "grad_norm": 0.7310414910316467, |
| "learning_rate": 5e-05, |
| "loss": 0.3032, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.5762994289398193, |
| "learning_rate": 4.7995991983967934e-05, |
| "loss": 0.2757, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_administration_accuracy": 0.9163018553262455, |
| "eval_administration_f1": 0.8799341161157713, |
| "eval_corruption_accuracy": 0.9427767354596623, |
| "eval_corruption_f1": 0.9297320702956003, |
| "eval_democracy_accuracy": 0.9418386491557224, |
| "eval_democracy_f1": 0.9228010908644549, |
| "eval_development_accuracy": 0.8867000208463623, |
| "eval_development_f1": 0.8490864044420574, |
| "eval_economy_accuracy": 0.9122368146758391, |
| "eval_economy_f1": 0.8967581427500766, |
| "eval_education_accuracy": 0.9583072753804461, |
| "eval_education_f1": 0.9515050567699045, |
| "eval_environment_accuracy": 0.9754012924744632, |
| "eval_environment_f1": 0.971384333060466, |
| "eval_instability_accuracy": 0.924119241192412, |
| "eval_instability_f1": 0.8935408759758452, |
| "eval_leadership_accuracy": 0.7805920366895976, |
| "eval_leadership_f1": 0.7320671383079426, |
| "eval_loss": 0.26127132773399353, |
| "eval_overall_accuracy": 0.9182127718713086, |
| "eval_overall_f1": 0.8968188861118868, |
| "eval_race_accuracy": 0.9360016677089847, |
| "eval_race_f1": 0.9209694401775296, |
| "eval_religion_accuracy": 0.9441317490097978, |
| "eval_religion_f1": 0.9328887566718251, |
| "eval_runtime": 10.8983, |
| "eval_safety_accuracy": 0.9001459245361685, |
| "eval_safety_f1": 0.8811592079111662, |
| "eval_samples_per_second": 880.318, |
| "eval_steps_per_second": 55.054, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.1667361400583576, |
| "grad_norm": 0.6828027367591858, |
| "learning_rate": 4.599198396793588e-05, |
| "loss": 0.2396, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.3334722801167154, |
| "grad_norm": 0.67964106798172, |
| "learning_rate": 4.398797595190381e-05, |
| "loss": 0.2312, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.500208420175073, |
| "grad_norm": 0.7818251252174377, |
| "learning_rate": 4.198396793587174e-05, |
| "loss": 0.2237, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.6669445602334307, |
| "grad_norm": 0.8807923197746277, |
| "learning_rate": 3.997995991983968e-05, |
| "loss": 0.2085, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.8336807002917883, |
| "grad_norm": 0.8586667776107788, |
| "learning_rate": 3.797595190380762e-05, |
| "loss": 0.2007, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.9014652371406555, |
| "learning_rate": 3.5971943887775555e-05, |
| "loss": 0.2, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_administration_accuracy": 0.9375651448822181, |
| "eval_administration_f1": 0.9290316070134363, |
| "eval_corruption_accuracy": 0.9589326662497394, |
| "eval_corruption_f1": 0.9550373529738804, |
| "eval_democracy_accuracy": 0.9521575984990619, |
| "eval_democracy_f1": 0.9433460958983528, |
| "eval_development_accuracy": 0.9138002918490723, |
| "eval_development_f1": 0.8974478772752896, |
| "eval_economy_accuracy": 0.9307900771315405, |
| "eval_economy_f1": 0.9241148503263762, |
| "eval_education_accuracy": 0.9664373566812591, |
| "eval_education_f1": 0.9649731388208568, |
| "eval_environment_accuracy": 0.9822805920366896, |
| "eval_environment_f1": 0.9806789699363689, |
| "eval_instability_accuracy": 0.9471544715447154, |
| "eval_instability_f1": 0.9400721946847931, |
| "eval_leadership_accuracy": 0.8124869710235564, |
| "eval_leadership_f1": 0.7871605097392734, |
| "eval_loss": 0.20422659814357758, |
| "eval_overall_accuracy": 0.9359582377875061, |
| "eval_overall_f1": 0.9285199435647921, |
| "eval_race_accuracy": 0.9536168438607463, |
| "eval_race_f1": 0.950384319474048, |
| "eval_religion_accuracy": 0.9576818845111528, |
| "eval_religion_f1": 0.956169902275164, |
| "eval_runtime": 11.0011, |
| "eval_safety_accuracy": 0.9185949551803211, |
| "eval_safety_f1": 0.913822504359663, |
| "eval_samples_per_second": 872.094, |
| "eval_steps_per_second": 54.54, |
| "step": 1200 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 2995, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 2, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.020053452075827e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": { |
| "gradient_accumulation_steps": 4, |
| "learning_rate": 5e-05, |
| "num_train_epochs": 5 |
| } |
| } |
|
|