| { | |
| "best_global_step": 3420, | |
| "best_metric": 0.7986006996501749, | |
| "best_model_checkpoint": "evaluation_results/run_20250604_000220/checkpoint-3420", | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 3420, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.7309941520467836, | |
| "grad_norm": 2.736999988555908, | |
| "learning_rate": 8.540935672514621e-06, | |
| "loss": 0.4437, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.8245955355314356, | |
| "eval_f1": 0.7761662093296746, | |
| "eval_loss": 0.39194825291633606, | |
| "eval_precision": 0.8303047246295778, | |
| "eval_recall": 0.7286555446516192, | |
| "eval_runtime": 447.7517, | |
| "eval_samples_per_second": 21.811, | |
| "eval_steps_per_second": 2.727, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.4619883040935673, | |
| "grad_norm": 2.7669053077697754, | |
| "learning_rate": 7.078947368421053e-06, | |
| "loss": 0.3398, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.830534507474913, | |
| "eval_f1": 0.792995622263915, | |
| "eval_loss": 0.4003181755542755, | |
| "eval_precision": 0.8088798162796632, | |
| "eval_recall": 0.7777232580961727, | |
| "eval_runtime": 384.6018, | |
| "eval_samples_per_second": 25.392, | |
| "eval_steps_per_second": 3.175, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 2.192982456140351, | |
| "grad_norm": 4.7090277671813965, | |
| "learning_rate": 5.616959064327486e-06, | |
| "loss": 0.3073, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.9239766081871346, | |
| "grad_norm": 2.2827835083007812, | |
| "learning_rate": 4.154970760233918e-06, | |
| "loss": 0.2834, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.8345279541265616, | |
| "eval_f1": 0.795959595959596, | |
| "eval_loss": 0.4070938229560852, | |
| "eval_precision": 0.8199791883454735, | |
| "eval_recall": 0.7733071638861629, | |
| "eval_runtime": 619.2538, | |
| "eval_samples_per_second": 15.771, | |
| "eval_steps_per_second": 1.972, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 3.654970760233918, | |
| "grad_norm": 3.448413848876953, | |
| "learning_rate": 2.692982456140351e-06, | |
| "loss": 0.251, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8359614990784354, | |
| "eval_f1": 0.7973690867695421, | |
| "eval_loss": 0.43020206689834595, | |
| "eval_precision": 0.822976501305483, | |
| "eval_recall": 0.7733071638861629, | |
| "eval_runtime": 773.3763, | |
| "eval_samples_per_second": 12.628, | |
| "eval_steps_per_second": 1.579, | |
| "step": 2736 | |
| }, | |
| { | |
| "epoch": 4.385964912280702, | |
| "grad_norm": 2.972419500350952, | |
| "learning_rate": 1.2309941520467837e-06, | |
| "loss": 0.2379, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.8349375383985255, | |
| "eval_f1": 0.7986006996501749, | |
| "eval_loss": 0.45096608996391296, | |
| "eval_precision": 0.8136456211812627, | |
| "eval_recall": 0.7841020608439647, | |
| "eval_runtime": 774.6507, | |
| "eval_samples_per_second": 12.607, | |
| "eval_steps_per_second": 1.576, | |
| "step": 3420 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 3420, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.982111352788608e+16, | |
| "train_batch_size": 100, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |