| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 8.0, |
| "global_step": 976, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "learning_rate": 3e-05, |
| "loss": 1.2088, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_exact_match": 69.24822109930906, |
| "eval_f1": 72.17452412502678, |
| "eval_runtime": 3.8492, |
| "eval_samples_per_second": 2519.253, |
| "eval_steps_per_second": 8.054, |
| "step": 122 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 3e-05, |
| "loss": 0.5358, |
| "step": 244 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_exact_match": 69.43384551923275, |
| "eval_f1": 72.81617455837493, |
| "eval_runtime": 3.3554, |
| "eval_samples_per_second": 2889.993, |
| "eval_steps_per_second": 9.239, |
| "step": 244 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 3e-05, |
| "loss": 0.4775, |
| "step": 366 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_exact_match": 69.27915850262968, |
| "eval_f1": 72.85460519879189, |
| "eval_runtime": 3.3665, |
| "eval_samples_per_second": 2880.42, |
| "eval_steps_per_second": 9.208, |
| "step": 366 |
| }, |
| { |
| "epoch": 4.0, |
| "learning_rate": 3e-05, |
| "loss": 0.4447, |
| "step": 488 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_exact_match": 70.48571723213365, |
| "eval_f1": 73.22340685107518, |
| "eval_runtime": 3.4091, |
| "eval_samples_per_second": 2844.408, |
| "eval_steps_per_second": 9.093, |
| "step": 488 |
| }, |
| { |
| "epoch": 5.0, |
| "learning_rate": 3e-05, |
| "loss": 0.4137, |
| "step": 610 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_exact_match": 69.38228318036506, |
| "eval_f1": 72.88562345993678, |
| "eval_runtime": 3.3777, |
| "eval_samples_per_second": 2870.854, |
| "eval_steps_per_second": 9.178, |
| "step": 610 |
| }, |
| { |
| "epoch": 6.0, |
| "learning_rate": 3e-05, |
| "loss": 0.3915, |
| "step": 732 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_exact_match": 70.30009281220997, |
| "eval_f1": 73.2708621066116, |
| "eval_runtime": 3.344, |
| "eval_samples_per_second": 2899.797, |
| "eval_steps_per_second": 9.27, |
| "step": 732 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 3e-05, |
| "loss": 0.3681, |
| "step": 854 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_exact_match": 69.89790656904198, |
| "eval_f1": 72.65251695126908, |
| "eval_runtime": 3.3834, |
| "eval_samples_per_second": 2866.043, |
| "eval_steps_per_second": 9.162, |
| "step": 854 |
| }, |
| { |
| "epoch": 8.0, |
| "learning_rate": 3e-05, |
| "loss": 0.3543, |
| "step": 976 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_exact_match": 70.3929050221718, |
| "eval_f1": 73.33998211153649, |
| "eval_runtime": 3.3955, |
| "eval_samples_per_second": 2855.838, |
| "eval_steps_per_second": 9.13, |
| "step": 976 |
| } |
| ], |
| "max_steps": 1220, |
| "num_train_epochs": 10, |
| "total_flos": 117053190045696.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|