| { |
| "best_global_step": 2399, |
| "best_metric": 0.9244250028140656, |
| "best_model_checkpoint": "./results/run-1/checkpoint-2399", |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 2399, |
| "is_hyper_param_search": true, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.041684035014589414, |
| "grad_norm": 2.362386465072632, |
| "learning_rate": 9.900000000000002e-06, |
| "loss": 1.3944, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08336807002917883, |
| "grad_norm": 0.7483571767807007, |
| "learning_rate": 1.9900000000000003e-05, |
| "loss": 0.5576, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.12505210504376824, |
| "grad_norm": 0.8563070297241211, |
| "learning_rate": 2.9900000000000002e-05, |
| "loss": 0.4502, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.16673614005835766, |
| "grad_norm": 0.7200814485549927, |
| "learning_rate": 3.99e-05, |
| "loss": 0.388, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.20842017507294705, |
| "grad_norm": 1.4553083181381226, |
| "learning_rate": 4.99e-05, |
| "loss": 0.3508, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.25010421008753647, |
| "grad_norm": 0.9915399551391602, |
| "learning_rate": 4.978927203065135e-05, |
| "loss": 0.3186, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.29178824510212586, |
| "grad_norm": 0.9978141188621521, |
| "learning_rate": 4.957641549595573e-05, |
| "loss": 0.3087, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.3334722801167153, |
| "grad_norm": 1.3709461688995361, |
| "learning_rate": 4.936355896126011e-05, |
| "loss": 0.2976, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.3751563151313047, |
| "grad_norm": 1.82745361328125, |
| "learning_rate": 4.91507024265645e-05, |
| "loss": 0.28, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4168403501458941, |
| "grad_norm": 1.2072925567626953, |
| "learning_rate": 4.893784589186888e-05, |
| "loss": 0.2751, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.45852438516048355, |
| "grad_norm": 1.0433937311172485, |
| "learning_rate": 4.872498935717327e-05, |
| "loss": 0.2712, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.5002084201750729, |
| "grad_norm": 1.1097549200057983, |
| "learning_rate": 4.851213282247765e-05, |
| "loss": 0.2909, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.5418924551896623, |
| "grad_norm": 1.2917771339416504, |
| "learning_rate": 4.8299276287782034e-05, |
| "loss": 0.2501, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.5835764902042517, |
| "grad_norm": 0.9812286496162415, |
| "learning_rate": 4.808641975308642e-05, |
| "loss": 0.2621, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.6252605252188412, |
| "grad_norm": 1.1786612272262573, |
| "learning_rate": 4.7873563218390804e-05, |
| "loss": 0.2435, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6669445602334306, |
| "grad_norm": 1.4759145975112915, |
| "learning_rate": 4.766070668369519e-05, |
| "loss": 0.2398, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.70862859524802, |
| "grad_norm": 1.5548242330551147, |
| "learning_rate": 4.744785014899958e-05, |
| "loss": 0.2432, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.7503126302626094, |
| "grad_norm": 1.9005539417266846, |
| "learning_rate": 4.723499361430396e-05, |
| "loss": 0.2351, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.7919966652771988, |
| "grad_norm": 1.5831588506698608, |
| "learning_rate": 4.702213707960835e-05, |
| "loss": 0.2378, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.8336807002917882, |
| "grad_norm": 1.5482088327407837, |
| "learning_rate": 4.680928054491273e-05, |
| "loss": 0.2271, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.8753647353063777, |
| "grad_norm": 1.1437913179397583, |
| "learning_rate": 4.6596424010217114e-05, |
| "loss": 0.231, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.9170487703209671, |
| "grad_norm": 0.7985406517982483, |
| "learning_rate": 4.63835674755215e-05, |
| "loss": 0.228, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.9587328053355565, |
| "grad_norm": 1.9323142766952515, |
| "learning_rate": 4.6170710940825884e-05, |
| "loss": 0.2198, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_administration_accuracy": 0.9322493224932249, |
| "eval_administration_f1": 0.9122911143651031, |
| "eval_corruption_accuracy": 0.9560141755263707, |
| "eval_corruption_f1": 0.9517304794081561, |
| "eval_democracy_accuracy": 0.9505941213258287, |
| "eval_democracy_f1": 0.9401838742047087, |
| "eval_development_accuracy": 0.9118198874296435, |
| "eval_development_f1": 0.9003553411809495, |
| "eval_economy_accuracy": 0.9264123410464874, |
| "eval_economy_f1": 0.9202465382357672, |
| "eval_education_accuracy": 0.9643527204502814, |
| "eval_education_f1": 0.9615509617901187, |
| "eval_environment_accuracy": 0.9799874921826142, |
| "eval_environment_f1": 0.9786142985651582, |
| "eval_instability_accuracy": 0.9417344173441734, |
| "eval_instability_f1": 0.9340921018628459, |
| "eval_leadership_accuracy": 0.8085261621846987, |
| "eval_leadership_f1": 0.7858417402549625, |
| "eval_loss": 0.21971678733825684, |
| "eval_overall_accuracy": 0.9324317281634354, |
| "eval_overall_f1": 0.9244250028140656, |
| "eval_race_accuracy": 0.9508025849489264, |
| "eval_race_f1": 0.9481714649378918, |
| "eval_religion_accuracy": 0.9530956848030019, |
| "eval_religion_f1": 0.9518106778632144, |
| "eval_runtime": 10.9084, |
| "eval_safety_accuracy": 0.9135918282259746, |
| "eval_safety_f1": 0.9082114410999095, |
| "eval_samples_per_second": 879.505, |
| "eval_steps_per_second": 55.003, |
| "step": 2399 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 23990, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 2, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9686485853798400.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": { |
| "gradient_accumulation_steps": 1, |
| "learning_rate": 5e-05, |
| "num_train_epochs": 10 |
| } |
| } |
|
|