| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 375, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 3.0676674842834473, | |
| "learning_rate": 1.9520000000000003e-05, | |
| "loss": 1.1671, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 2.477043628692627, | |
| "learning_rate": 1.898666666666667e-05, | |
| "loss": 0.737, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 2.6543428897857666, | |
| "learning_rate": 1.8453333333333335e-05, | |
| "loss": 0.5566, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 6.639544486999512, | |
| "learning_rate": 1.792e-05, | |
| "loss": 0.4373, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 2.2970216274261475, | |
| "learning_rate": 1.7386666666666667e-05, | |
| "loss": 0.3546, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 5.090140342712402, | |
| "learning_rate": 1.6853333333333333e-05, | |
| "loss": 0.3553, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 2.696153163909912, | |
| "learning_rate": 1.632e-05, | |
| "loss": 0.3246, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 5.513143062591553, | |
| "learning_rate": 1.578666666666667e-05, | |
| "loss": 0.3048, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 5.173070430755615, | |
| "learning_rate": 1.5253333333333335e-05, | |
| "loss": 0.2941, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 4.507210731506348, | |
| "learning_rate": 1.4720000000000001e-05, | |
| "loss": 0.293, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 4.429561138153076, | |
| "learning_rate": 1.418666666666667e-05, | |
| "loss": 0.283, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 5.397491455078125, | |
| "learning_rate": 1.3653333333333334e-05, | |
| "loss": 0.2456, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.8911995942176009, | |
| "eval_f1": 0.8068669527896994, | |
| "eval_loss": 0.2935342788696289, | |
| "eval_precision": 0.7782377291543465, | |
| "eval_recall": 0.8376830044557607, | |
| "eval_runtime": 1.1986, | |
| "eval_samples_per_second": 125.141, | |
| "eval_steps_per_second": 8.343, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 4.7523274421691895, | |
| "learning_rate": 1.3120000000000001e-05, | |
| "loss": 0.27, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 5.5811262130737305, | |
| "learning_rate": 1.2586666666666668e-05, | |
| "loss": 0.2433, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 4.444863319396973, | |
| "learning_rate": 1.2053333333333335e-05, | |
| "loss": 0.2797, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 3.8088574409484863, | |
| "learning_rate": 1.152e-05, | |
| "loss": 0.2457, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.3599999999999999, | |
| "grad_norm": 3.965343952178955, | |
| "learning_rate": 1.0986666666666668e-05, | |
| "loss": 0.2554, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 5.77710485458374, | |
| "learning_rate": 1.0453333333333334e-05, | |
| "loss": 0.2369, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 4.24776029586792, | |
| "learning_rate": 9.920000000000002e-06, | |
| "loss": 0.2475, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 6.790882110595703, | |
| "learning_rate": 9.386666666666668e-06, | |
| "loss": 0.2885, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.6800000000000002, | |
| "grad_norm": 2.6119062900543213, | |
| "learning_rate": 8.853333333333334e-06, | |
| "loss": 0.2386, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 2.789976119995117, | |
| "learning_rate": 8.32e-06, | |
| "loss": 0.2163, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.8399999999999999, | |
| "grad_norm": 2.7082512378692627, | |
| "learning_rate": 7.786666666666666e-06, | |
| "loss": 0.2211, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 4.595925807952881, | |
| "learning_rate": 7.253333333333335e-06, | |
| "loss": 0.2195, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 3.239081621170044, | |
| "learning_rate": 6.720000000000001e-06, | |
| "loss": 0.2221, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8984275932031448, | |
| "eval_f1": 0.8320817062180834, | |
| "eval_loss": 0.3072827160358429, | |
| "eval_precision": 0.7878270762229806, | |
| "eval_recall": 0.8816040738383195, | |
| "eval_runtime": 1.1612, | |
| "eval_samples_per_second": 129.177, | |
| "eval_steps_per_second": 8.612, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 3.049429178237915, | |
| "learning_rate": 6.186666666666668e-06, | |
| "loss": 0.1524, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 3.8909575939178467, | |
| "learning_rate": 5.653333333333334e-06, | |
| "loss": 0.2572, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 4.024440288543701, | |
| "learning_rate": 5.12e-06, | |
| "loss": 0.2083, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 2.7988221645355225, | |
| "learning_rate": 4.586666666666667e-06, | |
| "loss": 0.1893, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 5.445796966552734, | |
| "learning_rate": 4.053333333333333e-06, | |
| "loss": 0.1918, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 2.8356993198394775, | |
| "learning_rate": 3.52e-06, | |
| "loss": 0.2006, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 2.6401376724243164, | |
| "learning_rate": 2.986666666666667e-06, | |
| "loss": 0.1883, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 3.2241694927215576, | |
| "learning_rate": 2.4533333333333333e-06, | |
| "loss": 0.2185, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.7199999999999998, | |
| "grad_norm": 1.8776860237121582, | |
| "learning_rate": 1.9200000000000003e-06, | |
| "loss": 0.1718, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 1.8782577514648438, | |
| "learning_rate": 1.3866666666666668e-06, | |
| "loss": 0.2874, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 3.1951863765716553, | |
| "learning_rate": 8.533333333333334e-07, | |
| "loss": 0.2036, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 2.771134853363037, | |
| "learning_rate": 3.2e-07, | |
| "loss": 0.2104, | |
| "step": 370 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 375, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 473453787413376.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |