| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 1384, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 9.421965317919077e-06, | |
| "loss": 1.814, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 8.84393063583815e-06, | |
| "loss": 1.1008, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 8.265895953757226e-06, | |
| "loss": 0.8649, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.762, | |
| "eval_f1": 0.7443581909981544, | |
| "eval_loss": 0.7707217931747437, | |
| "eval_precision": 0.8007820246885388, | |
| "eval_recall": 0.7255360611559928, | |
| "eval_runtime": 10.5048, | |
| "eval_samples_per_second": 47.598, | |
| "eval_steps_per_second": 3.998, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 7.687861271676302e-06, | |
| "loss": 0.7136, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 7.109826589595377e-06, | |
| "loss": 0.6514, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 6.531791907514451e-06, | |
| "loss": 0.6135, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.784, | |
| "eval_f1": 0.7845289578609783, | |
| "eval_loss": 0.6748060584068298, | |
| "eval_precision": 0.7856247693531655, | |
| "eval_recall": 0.7918236543574249, | |
| "eval_runtime": 10.4972, | |
| "eval_samples_per_second": 47.632, | |
| "eval_steps_per_second": 4.001, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 5.9537572254335265e-06, | |
| "loss": 0.623, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 5.375722543352601e-06, | |
| "loss": 0.4996, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 4.797687861271676e-06, | |
| "loss": 0.4835, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 4.219653179190752e-06, | |
| "loss": 0.4899, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.79, | |
| "eval_f1": 0.7805436767451568, | |
| "eval_loss": 0.6808088421821594, | |
| "eval_precision": 0.7803281881073597, | |
| "eval_recall": 0.7887121939209444, | |
| "eval_runtime": 10.4962, | |
| "eval_samples_per_second": 47.636, | |
| "eval_steps_per_second": 4.001, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 3.641618497109827e-06, | |
| "loss": 0.4174, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 3.063583815028902e-06, | |
| "loss": 0.3569, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 2.485549132947977e-06, | |
| "loss": 0.3678, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.792, | |
| "eval_f1": 0.7970230838037085, | |
| "eval_loss": 0.7039059996604919, | |
| "eval_precision": 0.802843584794583, | |
| "eval_recall": 0.7992789320511213, | |
| "eval_runtime": 10.5145, | |
| "eval_samples_per_second": 47.554, | |
| "eval_steps_per_second": 3.994, | |
| "step": 1384 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 1730, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "total_flos": 5820434589204480.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |