| { |
| "best_metric": 6.576526641845703, |
| "best_model_checkpoint": "poetry-author-classifier/checkpoint-4960", |
| "epoch": 4.0, |
| "eval_steps": 500, |
| "global_step": 4960, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.2298387096774195e-06, |
| "loss": 7.1674, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 2.459677419354839e-06, |
| "loss": 7.1856, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 3.709677419354839e-06, |
| "loss": 7.1487, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.9596774193548395e-06, |
| "loss": 7.1494, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 6.2096774193548386e-06, |
| "loss": 7.1324, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 7.459677419354839e-06, |
| "loss": 7.1285, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 8.70967741935484e-06, |
| "loss": 7.0996, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 9.939516129032258e-06, |
| "loss": 7.1009, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.1189516129032258e-05, |
| "loss": 7.0898, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.2439516129032258e-05, |
| "loss": 7.0901, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 1.368951612903226e-05, |
| "loss": 7.0708, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 1.4939516129032258e-05, |
| "loss": 7.0838, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 1.6189516129032258e-05, |
| "loss": 7.0898, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 1.743951612903226e-05, |
| "loss": 7.0739, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.8689516129032257e-05, |
| "loss": 7.0675, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.993951612903226e-05, |
| "loss": 7.0634, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 2.1189516129032257e-05, |
| "loss": 7.0577, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 2.243951612903226e-05, |
| "loss": 7.0256, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 2.368951612903226e-05, |
| "loss": 7.0134, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 2.493951612903226e-05, |
| "loss": 6.9918, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.0125, |
| "eval_f1_macro": 0.00016048011392233344, |
| "eval_f1_micro": 0.012500000000000002, |
| "eval_f1_weighted": 0.001365188288259011, |
| "eval_loss": 6.936024188995361, |
| "eval_precision_macro": 8.910496729984587e-05, |
| "eval_precision_micro": 0.0125, |
| "eval_precision_weighted": 0.0007582916511114492, |
| "eval_recall_macro": 0.0015107166462090453, |
| "eval_recall_micro": 0.0125, |
| "eval_recall_weighted": 0.0125, |
| "eval_runtime": 5.6603, |
| "eval_samples_per_second": 438.142, |
| "eval_steps_per_second": 27.384, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 2.618951612903226e-05, |
| "loss": 6.9564, |
| "step": 1302 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 2.743951612903226e-05, |
| "loss": 6.8974, |
| "step": 1364 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 2.8689516129032257e-05, |
| "loss": 6.9109, |
| "step": 1426 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 2.9939516129032256e-05, |
| "loss": 6.9089, |
| "step": 1488 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 3.1189516129032256e-05, |
| "loss": 6.8398, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 3.241935483870968e-05, |
| "loss": 6.9353, |
| "step": 1612 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 3.366935483870968e-05, |
| "loss": 6.9405, |
| "step": 1674 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 3.48991935483871e-05, |
| "loss": 6.9203, |
| "step": 1736 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 3.61491935483871e-05, |
| "loss": 6.8844, |
| "step": 1798 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 3.7399193548387094e-05, |
| "loss": 6.8705, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 3.86491935483871e-05, |
| "loss": 6.8628, |
| "step": 1922 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 3.98991935483871e-05, |
| "loss": 6.9242, |
| "step": 1984 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 4.11491935483871e-05, |
| "loss": 6.9142, |
| "step": 2046 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 4.23991935483871e-05, |
| "loss": 6.8626, |
| "step": 2108 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 4.36491935483871e-05, |
| "loss": 6.9109, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 4.48991935483871e-05, |
| "loss": 6.8685, |
| "step": 2232 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 4.61491935483871e-05, |
| "loss": 6.9082, |
| "step": 2294 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 4.73991935483871e-05, |
| "loss": 6.8367, |
| "step": 2356 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 4.86491935483871e-05, |
| "loss": 6.8052, |
| "step": 2418 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 4.98991935483871e-05, |
| "loss": 6.9271, |
| "step": 2480 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.01975806451612903, |
| "eval_f1_macro": 0.0005161676281010565, |
| "eval_f1_micro": 0.01975806451612903, |
| "eval_f1_weighted": 0.0032348889447503157, |
| "eval_loss": 6.7918314933776855, |
| "eval_precision_macro": 0.0003125183576995364, |
| "eval_precision_micro": 0.01975806451612903, |
| "eval_precision_weighted": 0.001930682521200322, |
| "eval_recall_macro": 0.0029791410946401033, |
| "eval_recall_micro": 0.01975806451612903, |
| "eval_recall_weighted": 0.01975806451612903, |
| "eval_runtime": 5.6711, |
| "eval_samples_per_second": 437.304, |
| "eval_steps_per_second": 27.332, |
| "step": 2480 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 4.987231182795699e-05, |
| "loss": 6.7229, |
| "step": 2542 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 4.9733422939068106e-05, |
| "loss": 6.6922, |
| "step": 2604 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 4.9594534050179215e-05, |
| "loss": 6.7156, |
| "step": 2666 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 4.9455645161290324e-05, |
| "loss": 6.7197, |
| "step": 2728 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 4.931675627240144e-05, |
| "loss": 6.6575, |
| "step": 2790 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 4.917786738351255e-05, |
| "loss": 6.7108, |
| "step": 2852 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 4.903897849462366e-05, |
| "loss": 6.6203, |
| "step": 2914 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 4.890008960573477e-05, |
| "loss": 6.6749, |
| "step": 2976 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 4.876120071684588e-05, |
| "loss": 6.6755, |
| "step": 3038 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 4.862231182795699e-05, |
| "loss": 6.65, |
| "step": 3100 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 4.84834229390681e-05, |
| "loss": 6.7578, |
| "step": 3162 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 4.834453405017921e-05, |
| "loss": 6.6982, |
| "step": 3224 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 4.820564516129033e-05, |
| "loss": 6.5956, |
| "step": 3286 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 4.806675627240144e-05, |
| "loss": 6.668, |
| "step": 3348 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 4.7927867383512546e-05, |
| "loss": 6.6552, |
| "step": 3410 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 4.778897849462366e-05, |
| "loss": 6.6823, |
| "step": 3472 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 4.765008960573477e-05, |
| "loss": 6.6094, |
| "step": 3534 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 4.751120071684588e-05, |
| "loss": 6.6157, |
| "step": 3596 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 4.737231182795699e-05, |
| "loss": 6.6346, |
| "step": 3658 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 4.72334229390681e-05, |
| "loss": 6.6196, |
| "step": 3720 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.031048387096774193, |
| "eval_f1_macro": 0.0024286702802756113, |
| "eval_f1_micro": 0.031048387096774193, |
| "eval_f1_weighted": 0.011223430962410517, |
| "eval_loss": 6.620409965515137, |
| "eval_precision_macro": 0.0018299977083098978, |
| "eval_precision_micro": 0.031048387096774193, |
| "eval_precision_weighted": 0.008385708572774478, |
| "eval_recall_macro": 0.005879374221598751, |
| "eval_recall_micro": 0.031048387096774193, |
| "eval_recall_weighted": 0.031048387096774193, |
| "eval_runtime": 5.6595, |
| "eval_samples_per_second": 438.199, |
| "eval_steps_per_second": 27.387, |
| "step": 3720 |
| }, |
| { |
| "epoch": 3.05, |
| "learning_rate": 4.7094534050179215e-05, |
| "loss": 6.27, |
| "step": 3782 |
| }, |
| { |
| "epoch": 3.1, |
| "learning_rate": 4.6955645161290324e-05, |
| "loss": 6.4106, |
| "step": 3844 |
| }, |
| { |
| "epoch": 3.15, |
| "learning_rate": 4.6816756272401434e-05, |
| "loss": 6.2928, |
| "step": 3906 |
| }, |
| { |
| "epoch": 3.2, |
| "learning_rate": 4.667786738351255e-05, |
| "loss": 6.4405, |
| "step": 3968 |
| }, |
| { |
| "epoch": 3.25, |
| "learning_rate": 4.653897849462366e-05, |
| "loss": 6.3277, |
| "step": 4030 |
| }, |
| { |
| "epoch": 3.3, |
| "learning_rate": 4.640008960573477e-05, |
| "loss": 6.3392, |
| "step": 4092 |
| }, |
| { |
| "epoch": 3.35, |
| "learning_rate": 4.6261200716845884e-05, |
| "loss": 6.302, |
| "step": 4154 |
| }, |
| { |
| "epoch": 3.4, |
| "learning_rate": 4.6122311827956993e-05, |
| "loss": 6.3753, |
| "step": 4216 |
| }, |
| { |
| "epoch": 3.45, |
| "learning_rate": 4.59834229390681e-05, |
| "loss": 6.2649, |
| "step": 4278 |
| }, |
| { |
| "epoch": 3.5, |
| "learning_rate": 4.584453405017922e-05, |
| "loss": 6.2694, |
| "step": 4340 |
| }, |
| { |
| "epoch": 3.55, |
| "learning_rate": 4.570564516129032e-05, |
| "loss": 6.3022, |
| "step": 4402 |
| }, |
| { |
| "epoch": 3.6, |
| "learning_rate": 4.556675627240144e-05, |
| "loss": 6.4359, |
| "step": 4464 |
| }, |
| { |
| "epoch": 3.65, |
| "learning_rate": 4.5427867383512546e-05, |
| "loss": 6.3682, |
| "step": 4526 |
| }, |
| { |
| "epoch": 3.7, |
| "learning_rate": 4.5288978494623656e-05, |
| "loss": 6.2065, |
| "step": 4588 |
| }, |
| { |
| "epoch": 3.75, |
| "learning_rate": 4.515008960573477e-05, |
| "loss": 6.4069, |
| "step": 4650 |
| }, |
| { |
| "epoch": 3.8, |
| "learning_rate": 4.501120071684588e-05, |
| "loss": 6.2821, |
| "step": 4712 |
| }, |
| { |
| "epoch": 3.85, |
| "learning_rate": 4.487231182795699e-05, |
| "loss": 6.1709, |
| "step": 4774 |
| }, |
| { |
| "epoch": 3.9, |
| "learning_rate": 4.4733422939068106e-05, |
| "loss": 6.3291, |
| "step": 4836 |
| }, |
| { |
| "epoch": 3.95, |
| "learning_rate": 4.4594534050179215e-05, |
| "loss": 6.2744, |
| "step": 4898 |
| }, |
| { |
| "epoch": 4.0, |
| "learning_rate": 4.4455645161290325e-05, |
| "loss": 6.312, |
| "step": 4960 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.04395161290322581, |
| "eval_f1_macro": 0.006051911225519596, |
| "eval_f1_micro": 0.04395161290322581, |
| "eval_f1_weighted": 0.021178625226199167, |
| "eval_loss": 6.576526641845703, |
| "eval_precision_macro": 0.004909111507595384, |
| "eval_precision_micro": 0.04395161290322581, |
| "eval_precision_weighted": 0.01643333097832909, |
| "eval_recall_macro": 0.012797544111277469, |
| "eval_recall_micro": 0.04395161290322581, |
| "eval_recall_weighted": 0.04395161290322581, |
| "eval_runtime": 5.6977, |
| "eval_samples_per_second": 435.262, |
| "eval_steps_per_second": 27.204, |
| "step": 4960 |
| } |
| ], |
| "logging_steps": 62, |
| "max_steps": 24800, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 20, |
| "save_steps": 500, |
| "total_flos": 2638150224863232.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|