| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.5004255319148936, | |
| "eval_steps": 50, | |
| "global_step": 588, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00013539849850576911, | |
| "loss": 4.8195, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00017615750792387034, | |
| "loss": 2.2427, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0002, | |
| "loss": 2.0574, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0001967741935483871, | |
| "loss": 2.0078, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0001931899641577061, | |
| "loss": 1.9905, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_accuracy": 0.6544462424008755, | |
| "eval_loss": 1.5293824672698975, | |
| "eval_runtime": 32.8926, | |
| "eval_samples_per_second": 61.047, | |
| "eval_steps_per_second": 0.243, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0001896057347670251, | |
| "loss": 1.9798, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00018602150537634407, | |
| "loss": 1.9546, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0001824372759856631, | |
| "loss": 1.9234, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00017885304659498208, | |
| "loss": 1.929, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00017526881720430107, | |
| "loss": 1.907, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_accuracy": 0.5062341056107924, | |
| "eval_loss": 3.5476841926574707, | |
| "eval_runtime": 32.158, | |
| "eval_samples_per_second": 62.442, | |
| "eval_steps_per_second": 0.249, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0001716845878136201, | |
| "loss": 1.8956, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00016810035842293908, | |
| "loss": 1.8982, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00016451612903225807, | |
| "loss": 1.8738, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00016093189964157706, | |
| "loss": 1.8788, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00015734767025089608, | |
| "loss": 1.8675, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_accuracy": 0.5123508896963466, | |
| "eval_loss": 3.6367032527923584, | |
| "eval_runtime": 33.3455, | |
| "eval_samples_per_second": 60.218, | |
| "eval_steps_per_second": 0.24, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00015376344086021504, | |
| "loss": 1.8457, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00015017921146953406, | |
| "loss": 1.8669, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00014659498207885305, | |
| "loss": 1.8484, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00014301075268817205, | |
| "loss": 1.8312, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00013942652329749104, | |
| "loss": 1.8359, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_accuracy": 0.514210995704377, | |
| "eval_loss": 3.6230080127716064, | |
| "eval_runtime": 33.3113, | |
| "eval_samples_per_second": 60.28, | |
| "eval_steps_per_second": 0.24, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00013584229390681005, | |
| "loss": 1.8261, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00013225806451612905, | |
| "loss": 1.8447, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00012867383512544804, | |
| "loss": 1.836, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00012508960573476703, | |
| "loss": 1.8209, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00012150537634408603, | |
| "loss": 1.8103, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_accuracy": 0.5280159907778466, | |
| "eval_loss": 3.4809513092041016, | |
| "eval_runtime": 33.3217, | |
| "eval_samples_per_second": 60.261, | |
| "eval_steps_per_second": 0.24, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00011792114695340501, | |
| "loss": 1.8104, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00011433691756272403, | |
| "loss": 1.8002, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.000110752688172043, | |
| "loss": 1.7979, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00010716845878136201, | |
| "loss": 1.7871, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.000103584229390681, | |
| "loss": 1.7905, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_accuracy": 0.5359403052501626, | |
| "eval_loss": 3.469621419906616, | |
| "eval_runtime": 33.3288, | |
| "eval_samples_per_second": 60.248, | |
| "eval_steps_per_second": 0.24, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0001, | |
| "loss": 1.7734, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 9.6415770609319e-05, | |
| "loss": 1.7913, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 9.2831541218638e-05, | |
| "loss": 1.76, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 8.924731182795699e-05, | |
| "loss": 1.7728, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 8.566308243727598e-05, | |
| "loss": 1.7578, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_accuracy": 0.5407767755955649, | |
| "eval_loss": 3.409736156463623, | |
| "eval_runtime": 32.2132, | |
| "eval_samples_per_second": 62.335, | |
| "eval_steps_per_second": 0.248, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 8.207885304659499e-05, | |
| "loss": 1.7371, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 7.849462365591398e-05, | |
| "loss": 1.7593, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 7.491039426523297e-05, | |
| "loss": 1.7437, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 7.132616487455197e-05, | |
| "loss": 1.7414, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 6.774193548387096e-05, | |
| "loss": 1.729, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_accuracy": 0.5427147714128822, | |
| "eval_loss": 3.4043824672698975, | |
| "eval_runtime": 33.2117, | |
| "eval_samples_per_second": 60.461, | |
| "eval_steps_per_second": 0.241, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 6.415770609318996e-05, | |
| "loss": 1.7609, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 6.057347670250897e-05, | |
| "loss": 1.7368, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 5.6989247311827965e-05, | |
| "loss": 1.7354, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 5.340501792114696e-05, | |
| "loss": 1.7269, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.982078853046595e-05, | |
| "loss": 1.7308, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_accuracy": 0.5484289625466852, | |
| "eval_loss": 3.3802289962768555, | |
| "eval_runtime": 32.0517, | |
| "eval_samples_per_second": 62.649, | |
| "eval_steps_per_second": 0.25, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.6236559139784944e-05, | |
| "loss": 1.7006, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.265232974910394e-05, | |
| "loss": 1.7124, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.906810035842295e-05, | |
| "loss": 1.7225, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.548387096774194e-05, | |
| "loss": 1.7029, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.1899641577060935e-05, | |
| "loss": 1.6896, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_accuracy": 0.5529602022019449, | |
| "eval_loss": 3.345804214477539, | |
| "eval_runtime": 32.1251, | |
| "eval_samples_per_second": 62.506, | |
| "eval_steps_per_second": 0.249, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.831541218637993e-05, | |
| "loss": 1.7141, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.4731182795698928e-05, | |
| "loss": 1.6969, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.1146953405017922e-05, | |
| "loss": 1.7087, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.7562724014336916e-05, | |
| "loss": 1.6779, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.3978494623655914e-05, | |
| "loss": 1.6721, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_accuracy": 0.5576710752298722, | |
| "eval_loss": 3.291583776473999, | |
| "eval_runtime": 32.0142, | |
| "eval_samples_per_second": 62.722, | |
| "eval_steps_per_second": 0.25, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.039426523297491e-05, | |
| "loss": 1.6997, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 6.810035842293908e-06, | |
| "loss": 1.6679, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 3.225806451612903e-06, | |
| "loss": 1.6949, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "step": 588, | |
| "total_flos": 6.465698107324629e+18, | |
| "train_loss": 1.8579450334821428, | |
| "train_runtime": 7071.4809, | |
| "train_samples_per_second": 21.266, | |
| "train_steps_per_second": 0.083 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 588, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "total_flos": 6.465698107324629e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |