xlm-roberta-large-lemma-cz / trainer_state.json
oliat's picture
Uploaded model files
b469860 verified
{
"best_metric": 0.9915220559014439,
"best_model_checkpoint": ".//debugged_cz_cac_ses_udpipe_16_0.01_0.00002_20_04-23-24_04-34/checkpoint-20995",
"epoch": 19.999595305544315,
"global_step": 24700,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 1.6666666666666667e-05,
"loss": 1.6616,
"step": 1235
},
{
"epoch": 1.0,
"eval_accuracy": 0.9560764270169905,
"eval_f1": 0.9400045099417687,
"eval_loss": 0.24223673343658447,
"eval_precision": 0.9412405365918448,
"eval_recall": 0.9387717253073337,
"eval_runtime": 27.0992,
"eval_samples_per_second": 91.221,
"eval_steps_per_second": 11.403,
"step": 1235
},
{
"epoch": 2.0,
"learning_rate": 1.914893617021277e-05,
"loss": 0.1798,
"step": 2470
},
{
"epoch": 2.0,
"eval_accuracy": 0.9818715435142852,
"eval_f1": 0.9734886124249771,
"eval_loss": 0.09523347020149231,
"eval_precision": 0.9736305091034373,
"eval_recall": 0.9733467571004663,
"eval_runtime": 22.0988,
"eval_samples_per_second": 111.861,
"eval_steps_per_second": 13.983,
"step": 2470
},
{
"epoch": 3.0,
"learning_rate": 1.8085106382978724e-05,
"loss": 0.0815,
"step": 3705
},
{
"epoch": 3.0,
"eval_accuracy": 0.987861121648332,
"eval_f1": 0.9822315856421673,
"eval_loss": 0.06692199409008026,
"eval_precision": 0.9824528852015798,
"eval_recall": 0.9820103857566765,
"eval_runtime": 22.0277,
"eval_samples_per_second": 112.223,
"eval_steps_per_second": 14.028,
"step": 3705
},
{
"epoch": 4.0,
"learning_rate": 1.7021276595744682e-05,
"loss": 0.0475,
"step": 4940
},
{
"epoch": 4.0,
"eval_accuracy": 0.9899974045161419,
"eval_f1": 0.9855729104566591,
"eval_loss": 0.05793284624814987,
"eval_precision": 0.9856381992103659,
"eval_recall": 0.985507630351844,
"eval_runtime": 21.9964,
"eval_samples_per_second": 112.382,
"eval_steps_per_second": 14.048,
"step": 4940
},
{
"epoch": 5.0,
"learning_rate": 1.595744680851064e-05,
"loss": 0.0339,
"step": 6175
},
{
"epoch": 5.0,
"eval_accuracy": 0.9916145906123346,
"eval_f1": 0.9877177873468036,
"eval_loss": 0.051405712962150574,
"eval_precision": 0.9878879436007527,
"eval_recall": 0.987547689699025,
"eval_runtime": 22.0233,
"eval_samples_per_second": 112.245,
"eval_steps_per_second": 14.031,
"step": 6175
},
{
"epoch": 6.0,
"learning_rate": 1.4893617021276596e-05,
"loss": 0.0235,
"step": 7410
},
{
"epoch": 6.0,
"eval_accuracy": 0.9920737916026114,
"eval_f1": 0.9885550786838342,
"eval_loss": 0.04887189716100693,
"eval_precision": 0.9885027021299142,
"eval_recall": 0.9886074607884697,
"eval_runtime": 22.0452,
"eval_samples_per_second": 112.133,
"eval_steps_per_second": 14.017,
"step": 7410
},
{
"epoch": 7.0,
"learning_rate": 1.3829787234042556e-05,
"loss": 0.0172,
"step": 8645
},
{
"epoch": 7.0,
"eval_accuracy": 0.9921336873839519,
"eval_f1": 0.9884765159341969,
"eval_loss": 0.052920494228601456,
"eval_precision": 0.9883456057636277,
"eval_recall": 0.9886074607884697,
"eval_runtime": 22.0116,
"eval_samples_per_second": 112.304,
"eval_steps_per_second": 14.038,
"step": 8645
},
{
"epoch": 8.0,
"learning_rate": 1.2765957446808513e-05,
"loss": 0.0135,
"step": 9880
},
{
"epoch": 8.0,
"eval_accuracy": 0.9927725757182503,
"eval_f1": 0.9894034121013034,
"eval_loss": 0.047851815819740295,
"eval_precision": 0.9892985802076711,
"eval_recall": 0.9895082662144977,
"eval_runtime": 22.0657,
"eval_samples_per_second": 112.029,
"eval_steps_per_second": 14.004,
"step": 9880
},
{
"epoch": 9.0,
"learning_rate": 1.170212765957447e-05,
"loss": 0.0104,
"step": 11115
},
{
"epoch": 9.0,
"eval_accuracy": 0.992572923113782,
"eval_f1": 0.9889149350391355,
"eval_loss": 0.051443468779325485,
"eval_precision": 0.9886661547016922,
"eval_recall": 0.9891638406104282,
"eval_runtime": 21.9418,
"eval_samples_per_second": 112.662,
"eval_steps_per_second": 14.083,
"step": 11115
},
{
"epoch": 10.0,
"learning_rate": 1.0638297872340426e-05,
"loss": 0.0077,
"step": 12350
},
{
"epoch": 10.0,
"eval_accuracy": 0.9928724020204843,
"eval_f1": 0.9894731266799963,
"eval_loss": 0.04969193413853645,
"eval_precision": 0.9890409508431056,
"eval_recall": 0.9899056803730394,
"eval_runtime": 22.1653,
"eval_samples_per_second": 111.526,
"eval_steps_per_second": 13.941,
"step": 12350
},
{
"epoch": 11.0,
"learning_rate": 9.574468085106385e-06,
"loss": 0.006,
"step": 13585
},
{
"epoch": 11.0,
"eval_accuracy": 0.992912332541378,
"eval_f1": 0.9894125909337856,
"eval_loss": 0.05363365635275841,
"eval_precision": 0.9896879887601728,
"eval_recall": 0.9891373463331921,
"eval_runtime": 22.4359,
"eval_samples_per_second": 110.181,
"eval_steps_per_second": 13.773,
"step": 13585
},
{
"epoch": 12.0,
"learning_rate": 8.510638297872341e-06,
"loss": 0.005,
"step": 14820
},
{
"epoch": 12.0,
"eval_accuracy": 0.9936909776988041,
"eval_f1": 0.9908487729939477,
"eval_loss": 0.05106116086244583,
"eval_precision": 0.9905732821395472,
"eval_recall": 0.9911244171259008,
"eval_runtime": 22.3987,
"eval_samples_per_second": 110.363,
"eval_steps_per_second": 13.795,
"step": 14820
},
{
"epoch": 13.0,
"learning_rate": 7.446808510638298e-06,
"loss": 0.003,
"step": 16055
},
{
"epoch": 13.0,
"eval_accuracy": 0.9939704913450595,
"eval_f1": 0.9910981586965161,
"eval_loss": 0.050564348697662354,
"eval_precision": 0.9910719016584538,
"eval_recall": 0.9911244171259008,
"eval_runtime": 22.3763,
"eval_samples_per_second": 110.474,
"eval_steps_per_second": 13.809,
"step": 16055
},
{
"epoch": 14.0,
"learning_rate": 6.382978723404256e-06,
"loss": 0.0026,
"step": 17290
},
{
"epoch": 14.0,
"eval_accuracy": 0.9940902829077405,
"eval_f1": 0.9915086966312971,
"eval_loss": 0.05338473618030548,
"eval_precision": 0.9914955623261359,
"eval_recall": 0.9915218312844426,
"eval_runtime": 22.5218,
"eval_samples_per_second": 109.76,
"eval_steps_per_second": 13.72,
"step": 17290
},
{
"epoch": 15.0,
"learning_rate": 5.319148936170213e-06,
"loss": 0.0019,
"step": 18525
},
{
"epoch": 15.0,
"eval_accuracy": 0.9939105955637191,
"eval_f1": 0.9911127004940332,
"eval_loss": 0.05444112420082092,
"eval_precision": 0.9909420769658607,
"eval_recall": 0.9912833827893175,
"eval_runtime": 22.3561,
"eval_samples_per_second": 110.574,
"eval_steps_per_second": 13.822,
"step": 18525
},
{
"epoch": 16.0,
"learning_rate": 4.255319148936171e-06,
"loss": 0.0021,
"step": 19760
},
{
"epoch": 16.0,
"eval_accuracy": 0.9932118114480804,
"eval_f1": 0.9898781134075251,
"eval_loss": 0.05772731453180313,
"eval_precision": 0.9899830400678398,
"eval_recall": 0.9897732089868588,
"eval_runtime": 22.3439,
"eval_samples_per_second": 110.634,
"eval_steps_per_second": 13.829,
"step": 19760
},
{
"epoch": 17.0,
"learning_rate": 3.191489361702128e-06,
"loss": 0.002,
"step": 20995
},
{
"epoch": 17.0,
"eval_accuracy": 0.9941302134286342,
"eval_f1": 0.9915220559014439,
"eval_loss": 0.054844070225954056,
"eval_precision": 0.9914957876331267,
"eval_recall": 0.9915483255616787,
"eval_runtime": 22.2286,
"eval_samples_per_second": 111.208,
"eval_steps_per_second": 13.901,
"step": 20995
},
{
"epoch": 18.0,
"learning_rate": 2.1276595744680853e-06,
"loss": 0.0019,
"step": 22230
},
{
"epoch": 18.0,
"eval_accuracy": 0.9939704913450595,
"eval_f1": 0.9911230059886585,
"eval_loss": 0.05662142112851143,
"eval_precision": 0.9912806106222835,
"eval_recall": 0.990965451462484,
"eval_runtime": 22.2877,
"eval_samples_per_second": 110.913,
"eval_steps_per_second": 13.864,
"step": 22230
},
{
"epoch": 19.0,
"learning_rate": 1.0638297872340427e-06,
"loss": 0.0026,
"step": 23465
},
{
"epoch": 19.0,
"eval_accuracy": 0.9933316030107613,
"eval_f1": 0.9899420908259677,
"eval_loss": 0.05778687819838524,
"eval_precision": 0.9902701556244863,
"eval_recall": 0.9896142433234422,
"eval_runtime": 22.4497,
"eval_samples_per_second": 110.113,
"eval_steps_per_second": 13.764,
"step": 23465
},
{
"epoch": 20.0,
"learning_rate": 0.0,
"loss": 0.0025,
"step": 24700
},
{
"epoch": 20.0,
"eval_accuracy": 0.9933914987921018,
"eval_f1": 0.9900625397498409,
"eval_loss": 0.057573601603507996,
"eval_precision": 0.9902724766751484,
"eval_recall": 0.9898526918185672,
"eval_runtime": 22.5179,
"eval_samples_per_second": 109.779,
"eval_steps_per_second": 13.722,
"step": 24700
},
{
"epoch": 20.0,
"step": 24700,
"total_flos": 5.441699015795328e+16,
"train_loss": 0.10530064259463476,
"train_runtime": 11788.9391,
"train_samples_per_second": 33.537,
"train_steps_per_second": 2.095
}
],
"max_steps": 24700,
"num_train_epochs": 20,
"total_flos": 5.441699015795328e+16,
"trial_name": null,
"trial_params": null
}