| { |
| "best_global_step": 1200, |
| "best_metric": 0.9199542168750479, |
| "best_model_checkpoint": "./results/run-2/checkpoint-1200", |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 1200, |
| "is_hyper_param_search": true, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.08336807002917883, |
| "grad_norm": 1.2878578901290894, |
| "learning_rate": 1.4e-05, |
| "loss": 1.2644, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.16673614005835766, |
| "grad_norm": 1.0009047985076904, |
| "learning_rate": 2.8e-05, |
| "loss": 0.4758, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.25010421008753647, |
| "grad_norm": 0.7635347843170166, |
| "learning_rate": 4.2e-05, |
| "loss": 0.4054, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.3334722801167153, |
| "grad_norm": 0.8168604373931885, |
| "learning_rate": 5.6e-05, |
| "loss": 0.3468, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.4168403501458941, |
| "grad_norm": 1.1069319248199463, |
| "learning_rate": 7e-05, |
| "loss": 0.3051, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5002084201750729, |
| "grad_norm": 1.015086054801941, |
| "learning_rate": 6.962534789124383e-05, |
| "loss": 0.3044, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5835764902042517, |
| "grad_norm": 0.8430781364440918, |
| "learning_rate": 6.925069578248769e-05, |
| "loss": 0.2742, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.6669445602334306, |
| "grad_norm": 0.950409471988678, |
| "learning_rate": 6.887604367373152e-05, |
| "loss": 0.2573, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.7503126302626094, |
| "grad_norm": 1.171885371208191, |
| "learning_rate": 6.850139156497538e-05, |
| "loss": 0.2517, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.8336807002917882, |
| "grad_norm": 0.9133914709091187, |
| "learning_rate": 6.812673945621922e-05, |
| "loss": 0.2435, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.9170487703209671, |
| "grad_norm": 0.7939682602882385, |
| "learning_rate": 6.775208734746307e-05, |
| "loss": 0.2373, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.49670639634132385, |
| "learning_rate": 6.737743523870691e-05, |
| "loss": 0.2363, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_administration_accuracy": 0.9302689180737961, |
| "eval_administration_f1": 0.9077189587684091, |
| "eval_corruption_accuracy": 0.9534083802376485, |
| "eval_corruption_f1": 0.9474431988592255, |
| "eval_democracy_accuracy": 0.9473629351678132, |
| "eval_democracy_f1": 0.9358042521135516, |
| "eval_development_accuracy": 0.9094225557640192, |
| "eval_development_f1": 0.8937523747915405, |
| "eval_economy_accuracy": 0.924119241192412, |
| "eval_economy_f1": 0.9156575066932242, |
| "eval_education_accuracy": 0.9621638524077548, |
| "eval_education_f1": 0.9583809787976314, |
| "eval_environment_accuracy": 0.9775901605169898, |
| "eval_environment_f1": 0.9754739686963207, |
| "eval_instability_accuracy": 0.9422555764019178, |
| "eval_instability_f1": 0.9343996801103537, |
| "eval_leadership_accuracy": 0.7970606629143214, |
| "eval_leadership_f1": 0.7702375495381067, |
| "eval_loss": 0.22745274007320404, |
| "eval_overall_accuracy": 0.9296261552359112, |
| "eval_overall_f1": 0.9199542168750479, |
| "eval_race_accuracy": 0.9505941213258287, |
| "eval_race_f1": 0.9483610849847235, |
| "eval_religion_accuracy": 0.9505941213258287, |
| "eval_religion_f1": 0.9489643730683611, |
| "eval_runtime": 11.0331, |
| "eval_safety_accuracy": 0.9106733375026058, |
| "eval_safety_f1": 0.9032566760791263, |
| "eval_samples_per_second": 869.566, |
| "eval_steps_per_second": 54.382, |
| "step": 1200 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 19184, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 16, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 2, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.0100267260379136e+16, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": { |
| "gradient_accumulation_steps": 2, |
| "learning_rate": 7e-05, |
| "num_train_epochs": 16 |
| } |
| } |
|
|