| { | |
| "best_metric": 0.7413793103448276, | |
| "best_model_checkpoint": "/nesi/nobackup/uoa04081/wxy/model/bert_ft/sft/epoch35_6_1/warmup_ratio_lrtype/schedule-0.3-constant_with_warmup-epoch40-lr-2e-5/checkpoint-476", | |
| "epoch": 28.0, | |
| "eval_steps": 500, | |
| "global_step": 476, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.034482758620689655, | |
| "eval_loss": 1.9626049995422363, | |
| "eval_runtime": 0.3947, | |
| "eval_samples_per_second": 146.932, | |
| "eval_steps_per_second": 5.067, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.5172413793103449, | |
| "eval_loss": 1.8860366344451904, | |
| "eval_runtime": 0.3952, | |
| "eval_samples_per_second": 146.746, | |
| "eval_steps_per_second": 5.06, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.5172413793103449, | |
| "eval_loss": 1.559799313545227, | |
| "eval_runtime": 0.3943, | |
| "eval_samples_per_second": 147.102, | |
| "eval_steps_per_second": 5.072, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.5172413793103449, | |
| "eval_loss": 1.460962176322937, | |
| "eval_runtime": 0.3951, | |
| "eval_samples_per_second": 146.813, | |
| "eval_steps_per_second": 5.063, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.5172413793103449, | |
| "eval_loss": 1.4569206237792969, | |
| "eval_runtime": 0.3956, | |
| "eval_samples_per_second": 146.594, | |
| "eval_steps_per_second": 5.055, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.5172413793103449, | |
| "eval_loss": 1.4397205114364624, | |
| "eval_runtime": 0.3971, | |
| "eval_samples_per_second": 146.051, | |
| "eval_steps_per_second": 5.036, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.5172413793103449, | |
| "eval_loss": 1.3534083366394043, | |
| "eval_runtime": 0.3961, | |
| "eval_samples_per_second": 146.439, | |
| "eval_steps_per_second": 5.05, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.5689655172413793, | |
| "eval_loss": 1.2757465839385986, | |
| "eval_runtime": 0.3944, | |
| "eval_samples_per_second": 147.045, | |
| "eval_steps_per_second": 5.071, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.5862068965517241, | |
| "eval_loss": 1.1723886728286743, | |
| "eval_runtime": 0.3961, | |
| "eval_samples_per_second": 146.442, | |
| "eval_steps_per_second": 5.05, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.6551724137931034, | |
| "eval_loss": 1.0931514501571655, | |
| "eval_runtime": 0.394, | |
| "eval_samples_per_second": 147.197, | |
| "eval_steps_per_second": 5.076, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.6206896551724138, | |
| "eval_loss": 1.0942814350128174, | |
| "eval_runtime": 0.3944, | |
| "eval_samples_per_second": 147.077, | |
| "eval_steps_per_second": 5.072, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.6896551724137931, | |
| "eval_loss": 1.0222851037979126, | |
| "eval_runtime": 0.395, | |
| "eval_samples_per_second": 146.846, | |
| "eval_steps_per_second": 5.064, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.6379310344827587, | |
| "eval_loss": 1.0476372241973877, | |
| "eval_runtime": 0.3945, | |
| "eval_samples_per_second": 147.003, | |
| "eval_steps_per_second": 5.069, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.6724137931034483, | |
| "eval_loss": 1.054601788520813, | |
| "eval_runtime": 0.3939, | |
| "eval_samples_per_second": 147.227, | |
| "eval_steps_per_second": 5.077, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.6896551724137931, | |
| "eval_loss": 1.0961247682571411, | |
| "eval_runtime": 0.3955, | |
| "eval_samples_per_second": 146.649, | |
| "eval_steps_per_second": 5.057, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.6551724137931034, | |
| "eval_loss": 1.135161280632019, | |
| "eval_runtime": 0.3944, | |
| "eval_samples_per_second": 147.048, | |
| "eval_steps_per_second": 5.071, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.6724137931034483, | |
| "eval_loss": 1.132529377937317, | |
| "eval_runtime": 0.3947, | |
| "eval_samples_per_second": 146.937, | |
| "eval_steps_per_second": 5.067, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.6896551724137931, | |
| "eval_loss": 1.1694955825805664, | |
| "eval_runtime": 0.3942, | |
| "eval_samples_per_second": 147.136, | |
| "eval_steps_per_second": 5.074, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.6206896551724138, | |
| "eval_loss": 1.2848495244979858, | |
| "eval_runtime": 0.3945, | |
| "eval_samples_per_second": 147.012, | |
| "eval_steps_per_second": 5.069, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.5689655172413793, | |
| "eval_loss": 1.4167975187301636, | |
| "eval_runtime": 0.3944, | |
| "eval_samples_per_second": 147.046, | |
| "eval_steps_per_second": 5.071, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.6379310344827587, | |
| "eval_loss": 1.3460503816604614, | |
| "eval_runtime": 0.3949, | |
| "eval_samples_per_second": 146.89, | |
| "eval_steps_per_second": 5.065, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.6896551724137931, | |
| "eval_loss": 1.1948856115341187, | |
| "eval_runtime": 0.3941, | |
| "eval_samples_per_second": 147.152, | |
| "eval_steps_per_second": 5.074, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.6896551724137931, | |
| "eval_loss": 1.2731900215148926, | |
| "eval_runtime": 0.4055, | |
| "eval_samples_per_second": 143.034, | |
| "eval_steps_per_second": 4.932, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.6206896551724138, | |
| "eval_loss": 1.2199504375457764, | |
| "eval_runtime": 0.3945, | |
| "eval_samples_per_second": 147.003, | |
| "eval_steps_per_second": 5.069, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.5172413793103449, | |
| "eval_loss": 1.481724739074707, | |
| "eval_runtime": 0.3944, | |
| "eval_samples_per_second": 147.043, | |
| "eval_steps_per_second": 5.07, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.7068965517241379, | |
| "eval_loss": 1.297101616859436, | |
| "eval_runtime": 0.3951, | |
| "eval_samples_per_second": 146.796, | |
| "eval_steps_per_second": 5.062, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.6551724137931034, | |
| "eval_loss": 1.3901363611221313, | |
| "eval_runtime": 0.394, | |
| "eval_samples_per_second": 147.193, | |
| "eval_steps_per_second": 5.076, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.7413793103448276, | |
| "eval_loss": 1.278436541557312, | |
| "eval_runtime": 0.3946, | |
| "eval_samples_per_second": 146.998, | |
| "eval_steps_per_second": 5.069, | |
| "step": 476 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 680, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 40, | |
| "save_steps": 500, | |
| "total_flos": 3816334065868800.0, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |