| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 7.936, |
| "global_step": 496, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 2.1204, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 2.1463, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 1.8788, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 2.4e-05, |
| "loss": 1.6339, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 1.4717, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 4e-05, |
| "loss": 1.2297, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 4.8e-05, |
| "loss": 1.008, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 4.9327354260089685e-05, |
| "loss": 0.8316, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.99, |
| "eval_Macro F1": 0.7014511124514329, |
| "eval_Macro Precision": 0.6819163828588641, |
| "eval_Macro Recall": 0.7429670943209584, |
| "eval_Micro F1": 0.743, |
| "eval_Micro Precision": 0.743, |
| "eval_Micro Recall": 0.743, |
| "eval_Weighted F1": 0.702015038120142, |
| "eval_Weighted Precision": 0.6827499598363416, |
| "eval_Weighted Recall": 0.743, |
| "eval_accuracy": 0.743, |
| "eval_loss": 0.7518972158432007, |
| "eval_runtime": 447.9624, |
| "eval_samples_per_second": 4.465, |
| "eval_steps_per_second": 0.141, |
| "step": 62 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.8430493273542606e-05, |
| "loss": 0.751, |
| "step": 64 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 4.7533632286995514e-05, |
| "loss": 0.6157, |
| "step": 72 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 4.6636771300448435e-05, |
| "loss": 0.534, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 4.573991031390134e-05, |
| "loss": 0.4818, |
| "step": 88 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 4.4843049327354265e-05, |
| "loss": 0.4119, |
| "step": 96 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 4.394618834080718e-05, |
| "loss": 0.3738, |
| "step": 104 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 4.3049327354260094e-05, |
| "loss": 0.3613, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 4.215246636771301e-05, |
| "loss": 0.3561, |
| "step": 120 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_Macro F1": 0.9399641065415552, |
| "eval_Macro Precision": 0.9480023804705043, |
| "eval_Macro Recall": 0.9394177169921687, |
| "eval_Micro F1": 0.9395, |
| "eval_Micro Precision": 0.9395, |
| "eval_Micro Recall": 0.9395, |
| "eval_Weighted F1": 0.9400723727432211, |
| "eval_Weighted Precision": 0.9482020576131688, |
| "eval_Weighted Recall": 0.9395, |
| "eval_accuracy": 0.9395, |
| "eval_loss": 0.23021972179412842, |
| "eval_runtime": 447.7687, |
| "eval_samples_per_second": 4.467, |
| "eval_steps_per_second": 0.141, |
| "step": 125 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 4.125560538116592e-05, |
| "loss": 0.343, |
| "step": 128 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 4.035874439461884e-05, |
| "loss": 0.2871, |
| "step": 136 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 3.9461883408071745e-05, |
| "loss": 0.2976, |
| "step": 144 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 3.8565022421524667e-05, |
| "loss": 0.2644, |
| "step": 152 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 3.766816143497758e-05, |
| "loss": 0.2489, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 3.6771300448430496e-05, |
| "loss": 0.2646, |
| "step": 168 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 3.587443946188341e-05, |
| "loss": 0.2206, |
| "step": 176 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 3.4977578475336325e-05, |
| "loss": 0.2222, |
| "step": 184 |
| }, |
| { |
| "epoch": 2.99, |
| "eval_Macro F1": 0.9560784374426261, |
| "eval_Macro Precision": 0.960026790280936, |
| "eval_Macro Recall": 0.9551284727306868, |
| "eval_Micro F1": 0.956, |
| "eval_Micro Precision": 0.956, |
| "eval_Micro Recall": 0.956, |
| "eval_Weighted F1": 0.9564097977894885, |
| "eval_Weighted Precision": 0.9597888158665016, |
| "eval_Weighted Recall": 0.956, |
| "eval_accuracy": 0.956, |
| "eval_loss": 0.1349564790725708, |
| "eval_runtime": 446.9116, |
| "eval_samples_per_second": 4.475, |
| "eval_steps_per_second": 0.141, |
| "step": 187 |
| }, |
| { |
| "epoch": 3.07, |
| "learning_rate": 3.408071748878924e-05, |
| "loss": 0.2258, |
| "step": 192 |
| }, |
| { |
| "epoch": 3.2, |
| "learning_rate": 3.3183856502242154e-05, |
| "loss": 0.207, |
| "step": 200 |
| }, |
| { |
| "epoch": 3.33, |
| "learning_rate": 3.228699551569507e-05, |
| "loss": 0.1826, |
| "step": 208 |
| }, |
| { |
| "epoch": 3.46, |
| "learning_rate": 3.139013452914798e-05, |
| "loss": 0.1929, |
| "step": 216 |
| }, |
| { |
| "epoch": 3.58, |
| "learning_rate": 3.0493273542600898e-05, |
| "loss": 0.2159, |
| "step": 224 |
| }, |
| { |
| "epoch": 3.71, |
| "learning_rate": 2.9596412556053816e-05, |
| "loss": 0.1813, |
| "step": 232 |
| }, |
| { |
| "epoch": 3.84, |
| "learning_rate": 2.8699551569506727e-05, |
| "loss": 0.1723, |
| "step": 240 |
| }, |
| { |
| "epoch": 3.97, |
| "learning_rate": 2.7802690582959645e-05, |
| "loss": 0.1705, |
| "step": 248 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_Macro F1": 0.9725373484087296, |
| "eval_Macro Precision": 0.9740041726370726, |
| "eval_Macro Recall": 0.9721459242124089, |
| "eval_Micro F1": 0.9725, |
| "eval_Micro Precision": 0.9725, |
| "eval_Micro Recall": 0.9725, |
| "eval_Weighted F1": 0.9727108492700939, |
| "eval_Weighted Precision": 0.9739894365164001, |
| "eval_Weighted Recall": 0.9725, |
| "eval_accuracy": 0.9725, |
| "eval_loss": 0.0872766375541687, |
| "eval_runtime": 435.148, |
| "eval_samples_per_second": 4.596, |
| "eval_steps_per_second": 0.145, |
| "step": 250 |
| }, |
| { |
| "epoch": 4.1, |
| "learning_rate": 2.6905829596412556e-05, |
| "loss": 0.1824, |
| "step": 256 |
| }, |
| { |
| "epoch": 4.22, |
| "learning_rate": 2.600896860986547e-05, |
| "loss": 0.1877, |
| "step": 264 |
| }, |
| { |
| "epoch": 4.35, |
| "learning_rate": 2.511210762331839e-05, |
| "loss": 0.2047, |
| "step": 272 |
| }, |
| { |
| "epoch": 4.48, |
| "learning_rate": 2.4215246636771303e-05, |
| "loss": 0.1814, |
| "step": 280 |
| }, |
| { |
| "epoch": 4.61, |
| "learning_rate": 2.3318385650224218e-05, |
| "loss": 0.1396, |
| "step": 288 |
| }, |
| { |
| "epoch": 4.74, |
| "learning_rate": 2.2421524663677132e-05, |
| "loss": 0.1233, |
| "step": 296 |
| }, |
| { |
| "epoch": 4.86, |
| "learning_rate": 2.1524663677130047e-05, |
| "loss": 0.1612, |
| "step": 304 |
| }, |
| { |
| "epoch": 4.99, |
| "learning_rate": 2.062780269058296e-05, |
| "loss": 0.1541, |
| "step": 312 |
| }, |
| { |
| "epoch": 4.99, |
| "eval_Macro F1": 0.9823741759080236, |
| "eval_Macro Precision": 0.9829774434613384, |
| "eval_Macro Recall": 0.9821795541998369, |
| "eval_Micro F1": 0.9825, |
| "eval_Micro Precision": 0.9825, |
| "eval_Micro Recall": 0.9825, |
| "eval_Weighted F1": 0.982525654398398, |
| "eval_Weighted Precision": 0.9829534667560904, |
| "eval_Weighted Recall": 0.9825, |
| "eval_accuracy": 0.9825, |
| "eval_loss": 0.06422679126262665, |
| "eval_runtime": 424.7541, |
| "eval_samples_per_second": 4.709, |
| "eval_steps_per_second": 0.148, |
| "step": 312 |
| }, |
| { |
| "epoch": 5.12, |
| "learning_rate": 1.9730941704035873e-05, |
| "loss": 0.137, |
| "step": 320 |
| }, |
| { |
| "epoch": 5.25, |
| "learning_rate": 1.883408071748879e-05, |
| "loss": 0.1577, |
| "step": 328 |
| }, |
| { |
| "epoch": 5.38, |
| "learning_rate": 1.7937219730941705e-05, |
| "loss": 0.1378, |
| "step": 336 |
| }, |
| { |
| "epoch": 5.5, |
| "learning_rate": 1.704035874439462e-05, |
| "loss": 0.1795, |
| "step": 344 |
| }, |
| { |
| "epoch": 5.63, |
| "learning_rate": 1.6143497757847534e-05, |
| "loss": 0.1447, |
| "step": 352 |
| }, |
| { |
| "epoch": 5.76, |
| "learning_rate": 1.5246636771300449e-05, |
| "loss": 0.1235, |
| "step": 360 |
| }, |
| { |
| "epoch": 5.89, |
| "learning_rate": 1.4349775784753363e-05, |
| "loss": 0.1253, |
| "step": 368 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_Macro F1": 0.991431216491566, |
| "eval_Macro Precision": 0.9916198483282233, |
| "eval_Macro Recall": 0.9913450460193864, |
| "eval_Micro F1": 0.9915, |
| "eval_Micro Precision": 0.9915, |
| "eval_Micro Recall": 0.9915, |
| "eval_Weighted F1": 0.9915159731866887, |
| "eval_Weighted Precision": 0.9916339514381117, |
| "eval_Weighted Recall": 0.9915, |
| "eval_accuracy": 0.9915, |
| "eval_loss": 0.033043112605810165, |
| "eval_runtime": 424.2354, |
| "eval_samples_per_second": 4.714, |
| "eval_steps_per_second": 0.149, |
| "step": 375 |
| }, |
| { |
| "epoch": 6.02, |
| "learning_rate": 1.3452914798206278e-05, |
| "loss": 0.1134, |
| "step": 376 |
| }, |
| { |
| "epoch": 6.14, |
| "learning_rate": 1.2556053811659194e-05, |
| "loss": 0.1393, |
| "step": 384 |
| }, |
| { |
| "epoch": 6.27, |
| "learning_rate": 1.1659192825112109e-05, |
| "loss": 0.0912, |
| "step": 392 |
| }, |
| { |
| "epoch": 6.4, |
| "learning_rate": 1.0762331838565023e-05, |
| "loss": 0.1131, |
| "step": 400 |
| }, |
| { |
| "epoch": 6.53, |
| "learning_rate": 9.865470852017936e-06, |
| "loss": 0.1255, |
| "step": 408 |
| }, |
| { |
| "epoch": 6.66, |
| "learning_rate": 8.968609865470853e-06, |
| "loss": 0.1418, |
| "step": 416 |
| }, |
| { |
| "epoch": 6.78, |
| "learning_rate": 8.071748878923767e-06, |
| "loss": 0.1399, |
| "step": 424 |
| }, |
| { |
| "epoch": 6.91, |
| "learning_rate": 7.174887892376682e-06, |
| "loss": 0.1196, |
| "step": 432 |
| }, |
| { |
| "epoch": 6.99, |
| "eval_Macro F1": 0.9820075650260679, |
| "eval_Macro Precision": 0.983170950573056, |
| "eval_Macro Recall": 0.9817144393341324, |
| "eval_Micro F1": 0.982, |
| "eval_Micro Precision": 0.982, |
| "eval_Micro Recall": 0.982, |
| "eval_Weighted F1": 0.9821570480740702, |
| "eval_Weighted Precision": 0.9831749197494307, |
| "eval_Weighted Recall": 0.982, |
| "eval_accuracy": 0.982, |
| "eval_loss": 0.05244705080986023, |
| "eval_runtime": 424.1947, |
| "eval_samples_per_second": 4.715, |
| "eval_steps_per_second": 0.149, |
| "step": 437 |
| }, |
| { |
| "epoch": 7.04, |
| "learning_rate": 6.278026905829597e-06, |
| "loss": 0.1201, |
| "step": 440 |
| }, |
| { |
| "epoch": 7.17, |
| "learning_rate": 5.381165919282512e-06, |
| "loss": 0.1111, |
| "step": 448 |
| }, |
| { |
| "epoch": 7.3, |
| "learning_rate": 4.484304932735426e-06, |
| "loss": 0.1021, |
| "step": 456 |
| }, |
| { |
| "epoch": 7.42, |
| "learning_rate": 3.587443946188341e-06, |
| "loss": 0.1158, |
| "step": 464 |
| }, |
| { |
| "epoch": 7.55, |
| "learning_rate": 2.690582959641256e-06, |
| "loss": 0.1321, |
| "step": 472 |
| }, |
| { |
| "epoch": 7.68, |
| "learning_rate": 1.7937219730941704e-06, |
| "loss": 0.1429, |
| "step": 480 |
| }, |
| { |
| "epoch": 7.81, |
| "learning_rate": 8.968609865470852e-07, |
| "loss": 0.1103, |
| "step": 488 |
| }, |
| { |
| "epoch": 7.94, |
| "learning_rate": 0.0, |
| "loss": 0.0896, |
| "step": 496 |
| }, |
| { |
| "epoch": 7.94, |
| "eval_Macro F1": 0.9863352307981634, |
| "eval_Macro Precision": 0.9869554360352077, |
| "eval_Macro Recall": 0.986134153009335, |
| "eval_Micro F1": 0.9865, |
| "eval_Micro Precision": 0.9865, |
| "eval_Micro Recall": 0.9865, |
| "eval_Weighted F1": 0.9865091518400995, |
| "eval_Weighted Precision": 0.9869273604184196, |
| "eval_Weighted Recall": 0.9865, |
| "eval_accuracy": 0.9865, |
| "eval_loss": 0.04359065368771553, |
| "eval_runtime": 424.3089, |
| "eval_samples_per_second": 4.714, |
| "eval_steps_per_second": 0.148, |
| "step": 496 |
| }, |
| { |
| "epoch": 7.94, |
| "step": 496, |
| "total_flos": 4.920648490788323e+18, |
| "train_loss": 0.35526800215724974, |
| "train_runtime": 45263.0947, |
| "train_samples_per_second": 1.414, |
| "train_steps_per_second": 0.011 |
| } |
| ], |
| "max_steps": 496, |
| "num_train_epochs": 8, |
| "total_flos": 4.920648490788323e+18, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|