| { |
| "best_global_step": 18496, |
| "best_metric": 0.914106447425357, |
| "best_model_checkpoint": "./results/checkpoint-18496", |
| "epoch": 16.0, |
| "eval_steps": 500, |
| "global_step": 18496, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 35730.43359375, |
| "learning_rate": 9.666955017301039e-05, |
| "loss": 0.0899, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_exact_match": 0.8427087840761575, |
| "eval_loss": 0.05666187405586243, |
| "eval_runtime": 178.4282, |
| "eval_samples_per_second": 25.904, |
| "eval_steps_per_second": 0.813, |
| "step": 1156 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 61059.6328125, |
| "learning_rate": 9.333621683967705e-05, |
| "loss": 0.0609, |
| "step": 2312 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_exact_match": 0.8658589355257464, |
| "eval_loss": 0.0455557182431221, |
| "eval_runtime": 178.7826, |
| "eval_samples_per_second": 25.853, |
| "eval_steps_per_second": 0.811, |
| "step": 2312 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 17563.07421875, |
| "learning_rate": 9.000288350634371e-05, |
| "loss": 0.0469, |
| "step": 3468 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_exact_match": 0.8764604067503245, |
| "eval_loss": 0.042877476662397385, |
| "eval_runtime": 181.9993, |
| "eval_samples_per_second": 25.396, |
| "eval_steps_per_second": 0.797, |
| "step": 3468 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 87660.9375, |
| "learning_rate": 8.666955017301039e-05, |
| "loss": 0.0385, |
| "step": 4624 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_exact_match": 0.8855473820856772, |
| "eval_loss": 0.04085667058825493, |
| "eval_runtime": 178.1605, |
| "eval_samples_per_second": 25.943, |
| "eval_steps_per_second": 0.814, |
| "step": 4624 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 39212.90625, |
| "learning_rate": 8.333621683967705e-05, |
| "loss": 0.0323, |
| "step": 5780 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_exact_match": 0.8942016443098226, |
| "eval_loss": 0.037444353103637695, |
| "eval_runtime": 179.3565, |
| "eval_samples_per_second": 25.77, |
| "eval_steps_per_second": 0.808, |
| "step": 5780 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 11288.9541015625, |
| "learning_rate": 8.000288350634371e-05, |
| "loss": 0.0273, |
| "step": 6936 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_exact_match": 0.8976633491994808, |
| "eval_loss": 0.0365648977458477, |
| "eval_runtime": 179.1368, |
| "eval_samples_per_second": 25.802, |
| "eval_steps_per_second": 0.809, |
| "step": 6936 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 190008.28125, |
| "learning_rate": 7.666955017301039e-05, |
| "loss": 0.0243, |
| "step": 8092 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_exact_match": 0.8993942016443098, |
| "eval_loss": 0.03789495304226875, |
| "eval_runtime": 180.1506, |
| "eval_samples_per_second": 25.656, |
| "eval_steps_per_second": 0.805, |
| "step": 8092 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 55407.90234375, |
| "learning_rate": 7.333621683967705e-05, |
| "loss": 0.021, |
| "step": 9248 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_exact_match": 0.9056685417568152, |
| "eval_loss": 0.03875722736120224, |
| "eval_runtime": 178.8401, |
| "eval_samples_per_second": 25.844, |
| "eval_steps_per_second": 0.811, |
| "step": 9248 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 20677.572265625, |
| "learning_rate": 7.000288350634371e-05, |
| "loss": 0.0189, |
| "step": 10404 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_exact_match": 0.9078321073128516, |
| "eval_loss": 0.03903103992342949, |
| "eval_runtime": 181.7261, |
| "eval_samples_per_second": 25.434, |
| "eval_steps_per_second": 0.798, |
| "step": 10404 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 33437.46484375, |
| "learning_rate": 6.666955017301039e-05, |
| "loss": 0.0166, |
| "step": 11560 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_exact_match": 0.9071830376460407, |
| "eval_loss": 0.04076966270804405, |
| "eval_runtime": 177.5683, |
| "eval_samples_per_second": 26.029, |
| "eval_steps_per_second": 0.817, |
| "step": 11560 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 23686.2890625, |
| "learning_rate": 6.333621683967705e-05, |
| "loss": 0.0151, |
| "step": 12716 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_exact_match": 0.9102120294244915, |
| "eval_loss": 0.04429745301604271, |
| "eval_runtime": 174.7412, |
| "eval_samples_per_second": 26.451, |
| "eval_steps_per_second": 0.83, |
| "step": 12716 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 8527.7021484375, |
| "learning_rate": 6.000288350634372e-05, |
| "loss": 0.0133, |
| "step": 13872 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_exact_match": 0.9110774556469061, |
| "eval_loss": 0.04824285954236984, |
| "eval_runtime": 184.9616, |
| "eval_samples_per_second": 24.989, |
| "eval_steps_per_second": 0.784, |
| "step": 13872 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 5270.71826171875, |
| "learning_rate": 5.666955017301039e-05, |
| "loss": 0.0123, |
| "step": 15028 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_exact_match": 0.9073993942016443, |
| "eval_loss": 0.04638659209012985, |
| "eval_runtime": 176.014, |
| "eval_samples_per_second": 26.259, |
| "eval_steps_per_second": 0.824, |
| "step": 15028 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 15770.6142578125, |
| "learning_rate": 5.3336216839677045e-05, |
| "loss": 0.0112, |
| "step": 16184 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_exact_match": 0.9067503245348334, |
| "eval_loss": 0.05095385015010834, |
| "eval_runtime": 176.9305, |
| "eval_samples_per_second": 26.123, |
| "eval_steps_per_second": 0.82, |
| "step": 16184 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 2677.674560546875, |
| "learning_rate": 5.0002883506343714e-05, |
| "loss": 0.0106, |
| "step": 17340 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_exact_match": 0.9119428818693206, |
| "eval_loss": 0.050192590802907944, |
| "eval_runtime": 175.5562, |
| "eval_samples_per_second": 26.328, |
| "eval_steps_per_second": 0.826, |
| "step": 17340 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 109526.6953125, |
| "learning_rate": 4.6669550173010384e-05, |
| "loss": 0.0097, |
| "step": 18496 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_exact_match": 0.914106447425357, |
| "eval_loss": 0.05421430617570877, |
| "eval_runtime": 175.3249, |
| "eval_samples_per_second": 26.362, |
| "eval_steps_per_second": 0.827, |
| "step": 18496 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 34680, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 30, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 10, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.634418345880166e+16, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|