dvs's picture
Upload folder using huggingface_hub
bf5b2cb verified
{
"best_metric": 6.576526641845703,
"best_model_checkpoint": "poetry-author-classifier/checkpoint-4960",
"epoch": 4.0,
"eval_steps": 500,
"global_step": 4960,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 1.2298387096774195e-06,
"loss": 7.1674,
"step": 62
},
{
"epoch": 0.1,
"learning_rate": 2.459677419354839e-06,
"loss": 7.1856,
"step": 124
},
{
"epoch": 0.15,
"learning_rate": 3.709677419354839e-06,
"loss": 7.1487,
"step": 186
},
{
"epoch": 0.2,
"learning_rate": 4.9596774193548395e-06,
"loss": 7.1494,
"step": 248
},
{
"epoch": 0.25,
"learning_rate": 6.2096774193548386e-06,
"loss": 7.1324,
"step": 310
},
{
"epoch": 0.3,
"learning_rate": 7.459677419354839e-06,
"loss": 7.1285,
"step": 372
},
{
"epoch": 0.35,
"learning_rate": 8.70967741935484e-06,
"loss": 7.0996,
"step": 434
},
{
"epoch": 0.4,
"learning_rate": 9.939516129032258e-06,
"loss": 7.1009,
"step": 496
},
{
"epoch": 0.45,
"learning_rate": 1.1189516129032258e-05,
"loss": 7.0898,
"step": 558
},
{
"epoch": 0.5,
"learning_rate": 1.2439516129032258e-05,
"loss": 7.0901,
"step": 620
},
{
"epoch": 0.55,
"learning_rate": 1.368951612903226e-05,
"loss": 7.0708,
"step": 682
},
{
"epoch": 0.6,
"learning_rate": 1.4939516129032258e-05,
"loss": 7.0838,
"step": 744
},
{
"epoch": 0.65,
"learning_rate": 1.6189516129032258e-05,
"loss": 7.0898,
"step": 806
},
{
"epoch": 0.7,
"learning_rate": 1.743951612903226e-05,
"loss": 7.0739,
"step": 868
},
{
"epoch": 0.75,
"learning_rate": 1.8689516129032257e-05,
"loss": 7.0675,
"step": 930
},
{
"epoch": 0.8,
"learning_rate": 1.993951612903226e-05,
"loss": 7.0634,
"step": 992
},
{
"epoch": 0.85,
"learning_rate": 2.1189516129032257e-05,
"loss": 7.0577,
"step": 1054
},
{
"epoch": 0.9,
"learning_rate": 2.243951612903226e-05,
"loss": 7.0256,
"step": 1116
},
{
"epoch": 0.95,
"learning_rate": 2.368951612903226e-05,
"loss": 7.0134,
"step": 1178
},
{
"epoch": 1.0,
"learning_rate": 2.493951612903226e-05,
"loss": 6.9918,
"step": 1240
},
{
"epoch": 1.0,
"eval_accuracy": 0.0125,
"eval_f1_macro": 0.00016048011392233344,
"eval_f1_micro": 0.012500000000000002,
"eval_f1_weighted": 0.001365188288259011,
"eval_loss": 6.936024188995361,
"eval_precision_macro": 8.910496729984587e-05,
"eval_precision_micro": 0.0125,
"eval_precision_weighted": 0.0007582916511114492,
"eval_recall_macro": 0.0015107166462090453,
"eval_recall_micro": 0.0125,
"eval_recall_weighted": 0.0125,
"eval_runtime": 5.6603,
"eval_samples_per_second": 438.142,
"eval_steps_per_second": 27.384,
"step": 1240
},
{
"epoch": 1.05,
"learning_rate": 2.618951612903226e-05,
"loss": 6.9564,
"step": 1302
},
{
"epoch": 1.1,
"learning_rate": 2.743951612903226e-05,
"loss": 6.8974,
"step": 1364
},
{
"epoch": 1.15,
"learning_rate": 2.8689516129032257e-05,
"loss": 6.9109,
"step": 1426
},
{
"epoch": 1.2,
"learning_rate": 2.9939516129032256e-05,
"loss": 6.9089,
"step": 1488
},
{
"epoch": 1.25,
"learning_rate": 3.1189516129032256e-05,
"loss": 6.8398,
"step": 1550
},
{
"epoch": 1.3,
"learning_rate": 3.241935483870968e-05,
"loss": 6.9353,
"step": 1612
},
{
"epoch": 1.35,
"learning_rate": 3.366935483870968e-05,
"loss": 6.9405,
"step": 1674
},
{
"epoch": 1.4,
"learning_rate": 3.48991935483871e-05,
"loss": 6.9203,
"step": 1736
},
{
"epoch": 1.45,
"learning_rate": 3.61491935483871e-05,
"loss": 6.8844,
"step": 1798
},
{
"epoch": 1.5,
"learning_rate": 3.7399193548387094e-05,
"loss": 6.8705,
"step": 1860
},
{
"epoch": 1.55,
"learning_rate": 3.86491935483871e-05,
"loss": 6.8628,
"step": 1922
},
{
"epoch": 1.6,
"learning_rate": 3.98991935483871e-05,
"loss": 6.9242,
"step": 1984
},
{
"epoch": 1.65,
"learning_rate": 4.11491935483871e-05,
"loss": 6.9142,
"step": 2046
},
{
"epoch": 1.7,
"learning_rate": 4.23991935483871e-05,
"loss": 6.8626,
"step": 2108
},
{
"epoch": 1.75,
"learning_rate": 4.36491935483871e-05,
"loss": 6.9109,
"step": 2170
},
{
"epoch": 1.8,
"learning_rate": 4.48991935483871e-05,
"loss": 6.8685,
"step": 2232
},
{
"epoch": 1.85,
"learning_rate": 4.61491935483871e-05,
"loss": 6.9082,
"step": 2294
},
{
"epoch": 1.9,
"learning_rate": 4.73991935483871e-05,
"loss": 6.8367,
"step": 2356
},
{
"epoch": 1.95,
"learning_rate": 4.86491935483871e-05,
"loss": 6.8052,
"step": 2418
},
{
"epoch": 2.0,
"learning_rate": 4.98991935483871e-05,
"loss": 6.9271,
"step": 2480
},
{
"epoch": 2.0,
"eval_accuracy": 0.01975806451612903,
"eval_f1_macro": 0.0005161676281010565,
"eval_f1_micro": 0.01975806451612903,
"eval_f1_weighted": 0.0032348889447503157,
"eval_loss": 6.7918314933776855,
"eval_precision_macro": 0.0003125183576995364,
"eval_precision_micro": 0.01975806451612903,
"eval_precision_weighted": 0.001930682521200322,
"eval_recall_macro": 0.0029791410946401033,
"eval_recall_micro": 0.01975806451612903,
"eval_recall_weighted": 0.01975806451612903,
"eval_runtime": 5.6711,
"eval_samples_per_second": 437.304,
"eval_steps_per_second": 27.332,
"step": 2480
},
{
"epoch": 2.05,
"learning_rate": 4.987231182795699e-05,
"loss": 6.7229,
"step": 2542
},
{
"epoch": 2.1,
"learning_rate": 4.9733422939068106e-05,
"loss": 6.6922,
"step": 2604
},
{
"epoch": 2.15,
"learning_rate": 4.9594534050179215e-05,
"loss": 6.7156,
"step": 2666
},
{
"epoch": 2.2,
"learning_rate": 4.9455645161290324e-05,
"loss": 6.7197,
"step": 2728
},
{
"epoch": 2.25,
"learning_rate": 4.931675627240144e-05,
"loss": 6.6575,
"step": 2790
},
{
"epoch": 2.3,
"learning_rate": 4.917786738351255e-05,
"loss": 6.7108,
"step": 2852
},
{
"epoch": 2.35,
"learning_rate": 4.903897849462366e-05,
"loss": 6.6203,
"step": 2914
},
{
"epoch": 2.4,
"learning_rate": 4.890008960573477e-05,
"loss": 6.6749,
"step": 2976
},
{
"epoch": 2.45,
"learning_rate": 4.876120071684588e-05,
"loss": 6.6755,
"step": 3038
},
{
"epoch": 2.5,
"learning_rate": 4.862231182795699e-05,
"loss": 6.65,
"step": 3100
},
{
"epoch": 2.55,
"learning_rate": 4.84834229390681e-05,
"loss": 6.7578,
"step": 3162
},
{
"epoch": 2.6,
"learning_rate": 4.834453405017921e-05,
"loss": 6.6982,
"step": 3224
},
{
"epoch": 2.65,
"learning_rate": 4.820564516129033e-05,
"loss": 6.5956,
"step": 3286
},
{
"epoch": 2.7,
"learning_rate": 4.806675627240144e-05,
"loss": 6.668,
"step": 3348
},
{
"epoch": 2.75,
"learning_rate": 4.7927867383512546e-05,
"loss": 6.6552,
"step": 3410
},
{
"epoch": 2.8,
"learning_rate": 4.778897849462366e-05,
"loss": 6.6823,
"step": 3472
},
{
"epoch": 2.85,
"learning_rate": 4.765008960573477e-05,
"loss": 6.6094,
"step": 3534
},
{
"epoch": 2.9,
"learning_rate": 4.751120071684588e-05,
"loss": 6.6157,
"step": 3596
},
{
"epoch": 2.95,
"learning_rate": 4.737231182795699e-05,
"loss": 6.6346,
"step": 3658
},
{
"epoch": 3.0,
"learning_rate": 4.72334229390681e-05,
"loss": 6.6196,
"step": 3720
},
{
"epoch": 3.0,
"eval_accuracy": 0.031048387096774193,
"eval_f1_macro": 0.0024286702802756113,
"eval_f1_micro": 0.031048387096774193,
"eval_f1_weighted": 0.011223430962410517,
"eval_loss": 6.620409965515137,
"eval_precision_macro": 0.0018299977083098978,
"eval_precision_micro": 0.031048387096774193,
"eval_precision_weighted": 0.008385708572774478,
"eval_recall_macro": 0.005879374221598751,
"eval_recall_micro": 0.031048387096774193,
"eval_recall_weighted": 0.031048387096774193,
"eval_runtime": 5.6595,
"eval_samples_per_second": 438.199,
"eval_steps_per_second": 27.387,
"step": 3720
},
{
"epoch": 3.05,
"learning_rate": 4.7094534050179215e-05,
"loss": 6.27,
"step": 3782
},
{
"epoch": 3.1,
"learning_rate": 4.6955645161290324e-05,
"loss": 6.4106,
"step": 3844
},
{
"epoch": 3.15,
"learning_rate": 4.6816756272401434e-05,
"loss": 6.2928,
"step": 3906
},
{
"epoch": 3.2,
"learning_rate": 4.667786738351255e-05,
"loss": 6.4405,
"step": 3968
},
{
"epoch": 3.25,
"learning_rate": 4.653897849462366e-05,
"loss": 6.3277,
"step": 4030
},
{
"epoch": 3.3,
"learning_rate": 4.640008960573477e-05,
"loss": 6.3392,
"step": 4092
},
{
"epoch": 3.35,
"learning_rate": 4.6261200716845884e-05,
"loss": 6.302,
"step": 4154
},
{
"epoch": 3.4,
"learning_rate": 4.6122311827956993e-05,
"loss": 6.3753,
"step": 4216
},
{
"epoch": 3.45,
"learning_rate": 4.59834229390681e-05,
"loss": 6.2649,
"step": 4278
},
{
"epoch": 3.5,
"learning_rate": 4.584453405017922e-05,
"loss": 6.2694,
"step": 4340
},
{
"epoch": 3.55,
"learning_rate": 4.570564516129032e-05,
"loss": 6.3022,
"step": 4402
},
{
"epoch": 3.6,
"learning_rate": 4.556675627240144e-05,
"loss": 6.4359,
"step": 4464
},
{
"epoch": 3.65,
"learning_rate": 4.5427867383512546e-05,
"loss": 6.3682,
"step": 4526
},
{
"epoch": 3.7,
"learning_rate": 4.5288978494623656e-05,
"loss": 6.2065,
"step": 4588
},
{
"epoch": 3.75,
"learning_rate": 4.515008960573477e-05,
"loss": 6.4069,
"step": 4650
},
{
"epoch": 3.8,
"learning_rate": 4.501120071684588e-05,
"loss": 6.2821,
"step": 4712
},
{
"epoch": 3.85,
"learning_rate": 4.487231182795699e-05,
"loss": 6.1709,
"step": 4774
},
{
"epoch": 3.9,
"learning_rate": 4.4733422939068106e-05,
"loss": 6.3291,
"step": 4836
},
{
"epoch": 3.95,
"learning_rate": 4.4594534050179215e-05,
"loss": 6.2744,
"step": 4898
},
{
"epoch": 4.0,
"learning_rate": 4.4455645161290325e-05,
"loss": 6.312,
"step": 4960
},
{
"epoch": 4.0,
"eval_accuracy": 0.04395161290322581,
"eval_f1_macro": 0.006051911225519596,
"eval_f1_micro": 0.04395161290322581,
"eval_f1_weighted": 0.021178625226199167,
"eval_loss": 6.576526641845703,
"eval_precision_macro": 0.004909111507595384,
"eval_precision_micro": 0.04395161290322581,
"eval_precision_weighted": 0.01643333097832909,
"eval_recall_macro": 0.012797544111277469,
"eval_recall_micro": 0.04395161290322581,
"eval_recall_weighted": 0.04395161290322581,
"eval_runtime": 5.6977,
"eval_samples_per_second": 435.262,
"eval_steps_per_second": 27.204,
"step": 4960
}
],
"logging_steps": 62,
"max_steps": 24800,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 2638150224863232.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}