| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 9.03225806451613, |
| "eval_steps": 200, |
| "global_step": 2800, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.65, |
| "learning_rate": 9.367741935483872e-06, |
| "loss": 0.1306, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.65, |
| "eval_accuracy": 0.9995035103822646, |
| "eval_f1": 0.9957530527888917, |
| "eval_loss": 0.0050870636478066444, |
| "eval_precision": 1.0, |
| "eval_recall": 0.9915420261462532, |
| "eval_runtime": 45.9581, |
| "eval_samples_per_second": 92.497, |
| "eval_steps_per_second": 2.894, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 8.722580645161291e-06, |
| "loss": 0.0056, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.29, |
| "eval_accuracy": 0.9995035103822646, |
| "eval_f1": 0.9957530527888917, |
| "eval_loss": 0.0040680356323719025, |
| "eval_precision": 1.0, |
| "eval_recall": 0.9915420261462532, |
| "eval_runtime": 46.1465, |
| "eval_samples_per_second": 92.12, |
| "eval_steps_per_second": 2.882, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 8.07741935483871e-06, |
| "loss": 0.0055, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.94, |
| "eval_accuracy": 0.9995035103822646, |
| "eval_f1": 0.9957530527888917, |
| "eval_loss": 0.0037690529134124517, |
| "eval_precision": 1.0, |
| "eval_recall": 0.9915420261462532, |
| "eval_runtime": 46.5369, |
| "eval_samples_per_second": 91.347, |
| "eval_steps_per_second": 2.858, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 7.4322580645161305e-06, |
| "loss": 0.0046, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.58, |
| "eval_accuracy": 0.9995035103822646, |
| "eval_f1": 0.9957530527888917, |
| "eval_loss": 0.00336311012506485, |
| "eval_precision": 1.0, |
| "eval_recall": 0.9915420261462532, |
| "eval_runtime": 46.6436, |
| "eval_samples_per_second": 91.138, |
| "eval_steps_per_second": 2.851, |
| "step": 800 |
| }, |
| { |
| "epoch": 3.23, |
| "learning_rate": 6.787096774193549e-06, |
| "loss": 0.0036, |
| "step": 1000 |
| }, |
| { |
| "epoch": 3.23, |
| "eval_accuracy": 0.9995035103822646, |
| "eval_f1": 0.9957530527888917, |
| "eval_loss": 0.0030723349191248417, |
| "eval_precision": 1.0, |
| "eval_recall": 0.9915420261462532, |
| "eval_runtime": 46.1256, |
| "eval_samples_per_second": 92.161, |
| "eval_steps_per_second": 2.883, |
| "step": 1000 |
| }, |
| { |
| "epoch": 3.87, |
| "learning_rate": 6.141935483870968e-06, |
| "loss": 0.0036, |
| "step": 1200 |
| }, |
| { |
| "epoch": 3.87, |
| "eval_accuracy": 0.9995035103822646, |
| "eval_f1": 0.9957530527888917, |
| "eval_loss": 0.002934606047347188, |
| "eval_precision": 1.0, |
| "eval_recall": 0.9915420261462532, |
| "eval_runtime": 45.9728, |
| "eval_samples_per_second": 92.468, |
| "eval_steps_per_second": 2.893, |
| "step": 1200 |
| }, |
| { |
| "epoch": 4.52, |
| "learning_rate": 5.496774193548387e-06, |
| "loss": 0.0033, |
| "step": 1400 |
| }, |
| { |
| "epoch": 4.52, |
| "eval_accuracy": 0.9995035103822646, |
| "eval_f1": 0.9957530527888917, |
| "eval_loss": 0.003328441409394145, |
| "eval_precision": 1.0, |
| "eval_recall": 0.9915420261462532, |
| "eval_runtime": 46.7329, |
| "eval_samples_per_second": 90.964, |
| "eval_steps_per_second": 2.846, |
| "step": 1400 |
| }, |
| { |
| "epoch": 5.16, |
| "learning_rate": 4.851612903225807e-06, |
| "loss": 0.0031, |
| "step": 1600 |
| }, |
| { |
| "epoch": 5.16, |
| "eval_accuracy": 0.9995028630685519, |
| "eval_f1": 0.9956028784597781, |
| "eval_loss": 0.0030518420971930027, |
| "eval_precision": 0.9995822652615589, |
| "eval_recall": 0.9916550502957465, |
| "eval_runtime": 46.3789, |
| "eval_samples_per_second": 91.658, |
| "eval_steps_per_second": 2.868, |
| "step": 1600 |
| }, |
| { |
| "epoch": 5.81, |
| "learning_rate": 4.2064516129032265e-06, |
| "loss": 0.0029, |
| "step": 1800 |
| }, |
| { |
| "epoch": 5.81, |
| "eval_accuracy": 0.9995009211274134, |
| "eval_f1": 0.9953768920234843, |
| "eval_loss": 0.002558706561103463, |
| "eval_precision": 0.9991458993679655, |
| "eval_recall": 0.9916362129374976, |
| "eval_runtime": 46.7775, |
| "eval_samples_per_second": 90.877, |
| "eval_steps_per_second": 2.843, |
| "step": 1800 |
| }, |
| { |
| "epoch": 6.45, |
| "learning_rate": 3.5612903225806457e-06, |
| "loss": 0.0025, |
| "step": 2000 |
| }, |
| { |
| "epoch": 6.45, |
| "eval_accuracy": 0.9995086888919672, |
| "eval_f1": 0.995433444582061, |
| "eval_loss": 0.0025446824729442596, |
| "eval_precision": 0.999240741795266, |
| "eval_recall": 0.9916550502957465, |
| "eval_runtime": 45.9645, |
| "eval_samples_per_second": 92.484, |
| "eval_steps_per_second": 2.894, |
| "step": 2000 |
| }, |
| { |
| "epoch": 7.1, |
| "learning_rate": 2.916129032258065e-06, |
| "loss": 0.0025, |
| "step": 2200 |
| }, |
| { |
| "epoch": 7.1, |
| "eval_accuracy": 0.9994717920103363, |
| "eval_f1": 0.9940712393843137, |
| "eval_loss": 0.0025424922350794077, |
| "eval_precision": 0.9948120059614768, |
| "eval_recall": 0.9933315751798968, |
| "eval_runtime": 46.0813, |
| "eval_samples_per_second": 92.25, |
| "eval_steps_per_second": 2.886, |
| "step": 2200 |
| }, |
| { |
| "epoch": 7.74, |
| "learning_rate": 2.270967741935484e-06, |
| "loss": 0.0022, |
| "step": 2400 |
| }, |
| { |
| "epoch": 7.74, |
| "eval_accuracy": 0.9994944479902851, |
| "eval_f1": 0.9944845142129827, |
| "eval_loss": 0.002427204977720976, |
| "eval_precision": 0.995488778572642, |
| "eval_recall": 0.9934822740458878, |
| "eval_runtime": 46.5385, |
| "eval_samples_per_second": 91.344, |
| "eval_steps_per_second": 2.858, |
| "step": 2400 |
| }, |
| { |
| "epoch": 8.39, |
| "learning_rate": 1.6258064516129033e-06, |
| "loss": 0.0021, |
| "step": 2600 |
| }, |
| { |
| "epoch": 8.39, |
| "eval_accuracy": 0.9995028630685519, |
| "eval_f1": 0.9945796136914243, |
| "eval_loss": 0.0023249166551977396, |
| "eval_precision": 0.9954335314652326, |
| "eval_recall": 0.9937271597031232, |
| "eval_runtime": 46.9763, |
| "eval_samples_per_second": 90.492, |
| "eval_steps_per_second": 2.831, |
| "step": 2600 |
| }, |
| { |
| "epoch": 9.03, |
| "learning_rate": 9.806451612903227e-07, |
| "loss": 0.0019, |
| "step": 2800 |
| }, |
| { |
| "epoch": 9.03, |
| "eval_accuracy": 0.9995332868130545, |
| "eval_f1": 0.9949169645130566, |
| "eval_loss": 0.0022366372868418694, |
| "eval_precision": 0.996166411723604, |
| "eval_recall": 0.9936706476283766, |
| "eval_runtime": 46.0411, |
| "eval_samples_per_second": 92.331, |
| "eval_steps_per_second": 2.889, |
| "step": 2800 |
| } |
| ], |
| "logging_steps": 200, |
| "max_steps": 3100, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 200, |
| "total_flos": 1.8556810970979564e+16, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|