| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 7.936, | |
| "global_step": 496, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 2.1204, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 2.1463, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 1.8788, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 2.4e-05, | |
| "loss": 1.6339, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 1.4717, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4e-05, | |
| "loss": 1.2297, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.8e-05, | |
| "loss": 1.008, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.9327354260089685e-05, | |
| "loss": 0.8316, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_Macro F1": 0.7014511124514329, | |
| "eval_Macro Precision": 0.6819163828588641, | |
| "eval_Macro Recall": 0.7429670943209584, | |
| "eval_Micro F1": 0.743, | |
| "eval_Micro Precision": 0.743, | |
| "eval_Micro Recall": 0.743, | |
| "eval_Weighted F1": 0.702015038120142, | |
| "eval_Weighted Precision": 0.6827499598363416, | |
| "eval_Weighted Recall": 0.743, | |
| "eval_accuracy": 0.743, | |
| "eval_loss": 0.7518972158432007, | |
| "eval_runtime": 447.9624, | |
| "eval_samples_per_second": 4.465, | |
| "eval_steps_per_second": 0.141, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.8430493273542606e-05, | |
| "loss": 0.751, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 4.7533632286995514e-05, | |
| "loss": 0.6157, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 4.6636771300448435e-05, | |
| "loss": 0.534, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 4.573991031390134e-05, | |
| "loss": 0.4818, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 4.4843049327354265e-05, | |
| "loss": 0.4119, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 4.394618834080718e-05, | |
| "loss": 0.3738, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 4.3049327354260094e-05, | |
| "loss": 0.3613, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 4.215246636771301e-05, | |
| "loss": 0.3561, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_Macro F1": 0.9399641065415552, | |
| "eval_Macro Precision": 0.9480023804705043, | |
| "eval_Macro Recall": 0.9394177169921687, | |
| "eval_Micro F1": 0.9395, | |
| "eval_Micro Precision": 0.9395, | |
| "eval_Micro Recall": 0.9395, | |
| "eval_Weighted F1": 0.9400723727432211, | |
| "eval_Weighted Precision": 0.9482020576131688, | |
| "eval_Weighted Recall": 0.9395, | |
| "eval_accuracy": 0.9395, | |
| "eval_loss": 0.23021972179412842, | |
| "eval_runtime": 447.7687, | |
| "eval_samples_per_second": 4.467, | |
| "eval_steps_per_second": 0.141, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 4.125560538116592e-05, | |
| "loss": 0.343, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 4.035874439461884e-05, | |
| "loss": 0.2871, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 3.9461883408071745e-05, | |
| "loss": 0.2976, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 3.8565022421524667e-05, | |
| "loss": 0.2644, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 3.766816143497758e-05, | |
| "loss": 0.2489, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 3.6771300448430496e-05, | |
| "loss": 0.2646, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.587443946188341e-05, | |
| "loss": 0.2206, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 3.4977578475336325e-05, | |
| "loss": 0.2222, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_Macro F1": 0.9560784374426261, | |
| "eval_Macro Precision": 0.960026790280936, | |
| "eval_Macro Recall": 0.9551284727306868, | |
| "eval_Micro F1": 0.956, | |
| "eval_Micro Precision": 0.956, | |
| "eval_Micro Recall": 0.956, | |
| "eval_Weighted F1": 0.9564097977894885, | |
| "eval_Weighted Precision": 0.9597888158665016, | |
| "eval_Weighted Recall": 0.956, | |
| "eval_accuracy": 0.956, | |
| "eval_loss": 0.1349564790725708, | |
| "eval_runtime": 446.9116, | |
| "eval_samples_per_second": 4.475, | |
| "eval_steps_per_second": 0.141, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 3.408071748878924e-05, | |
| "loss": 0.2258, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 3.3183856502242154e-05, | |
| "loss": 0.207, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 3.228699551569507e-05, | |
| "loss": 0.1826, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 3.139013452914798e-05, | |
| "loss": 0.1929, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 3.0493273542600898e-05, | |
| "loss": 0.2159, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 2.9596412556053816e-05, | |
| "loss": 0.1813, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 2.8699551569506727e-05, | |
| "loss": 0.1723, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 2.7802690582959645e-05, | |
| "loss": 0.1705, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_Macro F1": 0.9725373484087296, | |
| "eval_Macro Precision": 0.9740041726370726, | |
| "eval_Macro Recall": 0.9721459242124089, | |
| "eval_Micro F1": 0.9725, | |
| "eval_Micro Precision": 0.9725, | |
| "eval_Micro Recall": 0.9725, | |
| "eval_Weighted F1": 0.9727108492700939, | |
| "eval_Weighted Precision": 0.9739894365164001, | |
| "eval_Weighted Recall": 0.9725, | |
| "eval_accuracy": 0.9725, | |
| "eval_loss": 0.0872766375541687, | |
| "eval_runtime": 435.148, | |
| "eval_samples_per_second": 4.596, | |
| "eval_steps_per_second": 0.145, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 2.6905829596412556e-05, | |
| "loss": 0.1824, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 2.600896860986547e-05, | |
| "loss": 0.1877, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 2.511210762331839e-05, | |
| "loss": 0.2047, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 2.4215246636771303e-05, | |
| "loss": 0.1814, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 2.3318385650224218e-05, | |
| "loss": 0.1396, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 2.2421524663677132e-05, | |
| "loss": 0.1233, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 2.1524663677130047e-05, | |
| "loss": 0.1612, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 2.062780269058296e-05, | |
| "loss": 0.1541, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "eval_Macro F1": 0.9823741759080236, | |
| "eval_Macro Precision": 0.9829774434613384, | |
| "eval_Macro Recall": 0.9821795541998369, | |
| "eval_Micro F1": 0.9825, | |
| "eval_Micro Precision": 0.9825, | |
| "eval_Micro Recall": 0.9825, | |
| "eval_Weighted F1": 0.982525654398398, | |
| "eval_Weighted Precision": 0.9829534667560904, | |
| "eval_Weighted Recall": 0.9825, | |
| "eval_accuracy": 0.9825, | |
| "eval_loss": 0.06422679126262665, | |
| "eval_runtime": 424.7541, | |
| "eval_samples_per_second": 4.709, | |
| "eval_steps_per_second": 0.148, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 1.9730941704035873e-05, | |
| "loss": 0.137, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "learning_rate": 1.883408071748879e-05, | |
| "loss": 0.1577, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 1.7937219730941705e-05, | |
| "loss": 0.1378, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 1.704035874439462e-05, | |
| "loss": 0.1795, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "learning_rate": 1.6143497757847534e-05, | |
| "loss": 0.1447, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 1.5246636771300449e-05, | |
| "loss": 0.1235, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 1.4349775784753363e-05, | |
| "loss": 0.1253, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_Macro F1": 0.991431216491566, | |
| "eval_Macro Precision": 0.9916198483282233, | |
| "eval_Macro Recall": 0.9913450460193864, | |
| "eval_Micro F1": 0.9915, | |
| "eval_Micro Precision": 0.9915, | |
| "eval_Micro Recall": 0.9915, | |
| "eval_Weighted F1": 0.9915159731866887, | |
| "eval_Weighted Precision": 0.9916339514381117, | |
| "eval_Weighted Recall": 0.9915, | |
| "eval_accuracy": 0.9915, | |
| "eval_loss": 0.033043112605810165, | |
| "eval_runtime": 424.2354, | |
| "eval_samples_per_second": 4.714, | |
| "eval_steps_per_second": 0.149, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 1.3452914798206278e-05, | |
| "loss": 0.1134, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 6.14, | |
| "learning_rate": 1.2556053811659194e-05, | |
| "loss": 0.1393, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "learning_rate": 1.1659192825112109e-05, | |
| "loss": 0.0912, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 1.0762331838565023e-05, | |
| "loss": 0.1131, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 9.865470852017936e-06, | |
| "loss": 0.1255, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 6.66, | |
| "learning_rate": 8.968609865470853e-06, | |
| "loss": 0.1418, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 8.071748878923767e-06, | |
| "loss": 0.1399, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 7.174887892376682e-06, | |
| "loss": 0.1196, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "eval_Macro F1": 0.9820075650260679, | |
| "eval_Macro Precision": 0.983170950573056, | |
| "eval_Macro Recall": 0.9817144393341324, | |
| "eval_Micro F1": 0.982, | |
| "eval_Micro Precision": 0.982, | |
| "eval_Micro Recall": 0.982, | |
| "eval_Weighted F1": 0.9821570480740702, | |
| "eval_Weighted Precision": 0.9831749197494307, | |
| "eval_Weighted Recall": 0.982, | |
| "eval_accuracy": 0.982, | |
| "eval_loss": 0.05244705080986023, | |
| "eval_runtime": 424.1947, | |
| "eval_samples_per_second": 4.715, | |
| "eval_steps_per_second": 0.149, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 6.278026905829597e-06, | |
| "loss": 0.1201, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "learning_rate": 5.381165919282512e-06, | |
| "loss": 0.1111, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 4.484304932735426e-06, | |
| "loss": 0.1021, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "learning_rate": 3.587443946188341e-06, | |
| "loss": 0.1158, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 7.55, | |
| "learning_rate": 2.690582959641256e-06, | |
| "loss": 0.1321, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "learning_rate": 1.7937219730941704e-06, | |
| "loss": 0.1429, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "learning_rate": 8.968609865470852e-07, | |
| "loss": 0.1103, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "learning_rate": 0.0, | |
| "loss": 0.0896, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "eval_Macro F1": 0.9863352307981634, | |
| "eval_Macro Precision": 0.9869554360352077, | |
| "eval_Macro Recall": 0.986134153009335, | |
| "eval_Micro F1": 0.9865, | |
| "eval_Micro Precision": 0.9865, | |
| "eval_Micro Recall": 0.9865, | |
| "eval_Weighted F1": 0.9865091518400995, | |
| "eval_Weighted Precision": 0.9869273604184196, | |
| "eval_Weighted Recall": 0.9865, | |
| "eval_accuracy": 0.9865, | |
| "eval_loss": 0.04359065368771553, | |
| "eval_runtime": 424.3089, | |
| "eval_samples_per_second": 4.714, | |
| "eval_steps_per_second": 0.148, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "step": 496, | |
| "total_flos": 4.920648490788323e+18, | |
| "train_loss": 0.35526800215724974, | |
| "train_runtime": 45263.0947, | |
| "train_samples_per_second": 1.414, | |
| "train_steps_per_second": 0.011 | |
| } | |
| ], | |
| "max_steps": 496, | |
| "num_train_epochs": 8, | |
| "total_flos": 4.920648490788323e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |