sentiment-seq_bn-rf64-2 / trainer_state.json
apwic's picture
End of training
ebc7702 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 2440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 4.00977087020874,
"learning_rate": 4.75e-05,
"loss": 0.5604,
"step": 122
},
{
"epoch": 1.0,
"eval_accuracy": 0.7167919799498746,
"eval_f1": 0.608283160007298,
"eval_loss": 0.5190858244895935,
"eval_precision": 0.6430937818552498,
"eval_recall": 0.6021094744499,
"eval_runtime": 1.7103,
"eval_samples_per_second": 233.298,
"eval_steps_per_second": 29.235,
"step": 122
},
{
"epoch": 2.0,
"grad_norm": 4.0487446784973145,
"learning_rate": 4.5e-05,
"loss": 0.5023,
"step": 244
},
{
"epoch": 2.0,
"eval_accuracy": 0.7293233082706767,
"eval_f1": 0.6894315036900369,
"eval_loss": 0.5117867588996887,
"eval_precision": 0.6834947426674045,
"eval_recall": 0.7009910892889616,
"eval_runtime": 1.7052,
"eval_samples_per_second": 233.993,
"eval_steps_per_second": 29.322,
"step": 244
},
{
"epoch": 3.0,
"grad_norm": 4.1346282958984375,
"learning_rate": 4.25e-05,
"loss": 0.4676,
"step": 366
},
{
"epoch": 3.0,
"eval_accuracy": 0.7543859649122807,
"eval_f1": 0.7066176470588235,
"eval_loss": 0.46596524119377136,
"eval_precision": 0.704723824246388,
"eval_recall": 0.7087197672304055,
"eval_runtime": 1.7034,
"eval_samples_per_second": 234.238,
"eval_steps_per_second": 29.353,
"step": 366
},
{
"epoch": 4.0,
"grad_norm": 3.7155730724334717,
"learning_rate": 4e-05,
"loss": 0.4415,
"step": 488
},
{
"epoch": 4.0,
"eval_accuracy": 0.7844611528822055,
"eval_f1": 0.7345596880995111,
"eval_loss": 0.4403132498264313,
"eval_precision": 0.740066434672572,
"eval_recall": 0.7299963629750863,
"eval_runtime": 1.7029,
"eval_samples_per_second": 234.302,
"eval_steps_per_second": 29.361,
"step": 488
},
{
"epoch": 5.0,
"grad_norm": 5.956135272979736,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.4208,
"step": 610
},
{
"epoch": 5.0,
"eval_accuracy": 0.8120300751879699,
"eval_f1": 0.7738180718793697,
"eval_loss": 0.42520591616630554,
"eval_precision": 0.7731467519150732,
"eval_recall": 0.7745044553555192,
"eval_runtime": 1.7031,
"eval_samples_per_second": 234.273,
"eval_steps_per_second": 29.358,
"step": 610
},
{
"epoch": 6.0,
"grad_norm": 2.4589931964874268,
"learning_rate": 3.5e-05,
"loss": 0.383,
"step": 732
},
{
"epoch": 6.0,
"eval_accuracy": 0.8170426065162907,
"eval_f1": 0.7869291373142845,
"eval_loss": 0.41415733098983765,
"eval_precision": 0.7790262172284643,
"eval_recall": 0.7980541916712129,
"eval_runtime": 1.7034,
"eval_samples_per_second": 234.238,
"eval_steps_per_second": 29.353,
"step": 732
},
{
"epoch": 7.0,
"grad_norm": 4.000768184661865,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.3751,
"step": 854
},
{
"epoch": 7.0,
"eval_accuracy": 0.8270676691729323,
"eval_f1": 0.7790538100397255,
"eval_loss": 0.3982468843460083,
"eval_precision": 0.8007425742574257,
"eval_recall": 0.765139116202946,
"eval_runtime": 1.7033,
"eval_samples_per_second": 234.25,
"eval_steps_per_second": 29.355,
"step": 854
},
{
"epoch": 8.0,
"grad_norm": 4.909487724304199,
"learning_rate": 3e-05,
"loss": 0.3554,
"step": 976
},
{
"epoch": 8.0,
"eval_accuracy": 0.8395989974937343,
"eval_f1": 0.7944490952411617,
"eval_loss": 0.38216620683670044,
"eval_precision": 0.81875,
"eval_recall": 0.7790052736861247,
"eval_runtime": 1.7031,
"eval_samples_per_second": 234.272,
"eval_steps_per_second": 29.357,
"step": 976
},
{
"epoch": 9.0,
"grad_norm": 8.974835395812988,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.3502,
"step": 1098
},
{
"epoch": 9.0,
"eval_accuracy": 0.8521303258145363,
"eval_f1": 0.8237962290701417,
"eval_loss": 0.3767492175102234,
"eval_precision": 0.8201159969225307,
"eval_recall": 0.8278777959629023,
"eval_runtime": 1.7032,
"eval_samples_per_second": 234.262,
"eval_steps_per_second": 29.356,
"step": 1098
},
{
"epoch": 10.0,
"grad_norm": 5.985044956207275,
"learning_rate": 2.5e-05,
"loss": 0.3326,
"step": 1220
},
{
"epoch": 10.0,
"eval_accuracy": 0.8571428571428571,
"eval_f1": 0.8235951134380454,
"eval_loss": 0.3652937412261963,
"eval_precision": 0.8321878579610538,
"eval_recall": 0.8164211674849973,
"eval_runtime": 1.7027,
"eval_samples_per_second": 234.329,
"eval_steps_per_second": 29.365,
"step": 1220
},
{
"epoch": 11.0,
"grad_norm": 10.441373825073242,
"learning_rate": 2.25e-05,
"loss": 0.3246,
"step": 1342
},
{
"epoch": 11.0,
"eval_accuracy": 0.8571428571428571,
"eval_f1": 0.822632092025736,
"eval_loss": 0.3636661469936371,
"eval_precision": 0.8334889561465646,
"eval_recall": 0.813920712856883,
"eval_runtime": 1.7033,
"eval_samples_per_second": 234.245,
"eval_steps_per_second": 29.354,
"step": 1342
},
{
"epoch": 12.0,
"grad_norm": 11.4411039352417,
"learning_rate": 2e-05,
"loss": 0.3255,
"step": 1464
},
{
"epoch": 12.0,
"eval_accuracy": 0.8546365914786967,
"eval_f1": 0.8209821152299028,
"eval_loss": 0.3571181893348694,
"eval_precision": 0.8284245491932933,
"eval_recall": 0.8146481178396072,
"eval_runtime": 1.7023,
"eval_samples_per_second": 234.39,
"eval_steps_per_second": 29.372,
"step": 1464
},
{
"epoch": 13.0,
"grad_norm": 2.8683927059173584,
"learning_rate": 1.75e-05,
"loss": 0.3096,
"step": 1586
},
{
"epoch": 13.0,
"eval_accuracy": 0.8471177944862155,
"eval_f1": 0.8022843031331996,
"eval_loss": 0.35997274518013,
"eval_precision": 0.832141339753576,
"eval_recall": 0.784324422622295,
"eval_runtime": 1.704,
"eval_samples_per_second": 234.154,
"eval_steps_per_second": 29.343,
"step": 1586
},
{
"epoch": 14.0,
"grad_norm": 7.7256760597229,
"learning_rate": 1.5e-05,
"loss": 0.3123,
"step": 1708
},
{
"epoch": 14.0,
"eval_accuracy": 0.8596491228070176,
"eval_f1": 0.8271551457392166,
"eval_loss": 0.34547460079193115,
"eval_precision": 0.8347358430876305,
"eval_recall": 0.8206946717585015,
"eval_runtime": 1.7031,
"eval_samples_per_second": 234.279,
"eval_steps_per_second": 29.358,
"step": 1708
},
{
"epoch": 15.0,
"grad_norm": 3.0547094345092773,
"learning_rate": 1.25e-05,
"loss": 0.2937,
"step": 1830
},
{
"epoch": 15.0,
"eval_accuracy": 0.8621553884711779,
"eval_f1": 0.8249232119350592,
"eval_loss": 0.34601926803588867,
"eval_precision": 0.8467014712861889,
"eval_recall": 0.8099654482633206,
"eval_runtime": 1.7039,
"eval_samples_per_second": 234.176,
"eval_steps_per_second": 29.345,
"step": 1830
},
{
"epoch": 16.0,
"grad_norm": 4.09341287612915,
"learning_rate": 1e-05,
"loss": 0.2941,
"step": 1952
},
{
"epoch": 16.0,
"eval_accuracy": 0.8596491228070176,
"eval_f1": 0.8280701754385965,
"eval_loss": 0.34146586060523987,
"eval_precision": 0.833567942942943,
"eval_recall": 0.8231951263866157,
"eval_runtime": 1.703,
"eval_samples_per_second": 234.292,
"eval_steps_per_second": 29.36,
"step": 1952
},
{
"epoch": 17.0,
"grad_norm": 2.538574457168579,
"learning_rate": 7.5e-06,
"loss": 0.3031,
"step": 2074
},
{
"epoch": 17.0,
"eval_accuracy": 0.8646616541353384,
"eval_f1": 0.8333281762485303,
"eval_loss": 0.3417251408100128,
"eval_precision": 0.8410471369819678,
"eval_recall": 0.8267412256773959,
"eval_runtime": 1.7034,
"eval_samples_per_second": 234.243,
"eval_steps_per_second": 29.354,
"step": 2074
},
{
"epoch": 18.0,
"grad_norm": 6.733055114746094,
"learning_rate": 5e-06,
"loss": 0.3003,
"step": 2196
},
{
"epoch": 18.0,
"eval_accuracy": 0.8621553884711779,
"eval_f1": 0.8315822595375324,
"eval_loss": 0.3399008512496948,
"eval_precision": 0.8361280487804879,
"eval_recall": 0.82746863066012,
"eval_runtime": 1.703,
"eval_samples_per_second": 234.288,
"eval_steps_per_second": 29.359,
"step": 2196
},
{
"epoch": 19.0,
"grad_norm": 2.863952398300171,
"learning_rate": 2.5e-06,
"loss": 0.2976,
"step": 2318
},
{
"epoch": 19.0,
"eval_accuracy": 0.8671679197994987,
"eval_f1": 0.8332268672959993,
"eval_loss": 0.34020429849624634,
"eval_precision": 0.849623687858982,
"eval_recall": 0.8210129114384433,
"eval_runtime": 1.7043,
"eval_samples_per_second": 234.111,
"eval_steps_per_second": 29.337,
"step": 2318
},
{
"epoch": 20.0,
"grad_norm": 4.950519561767578,
"learning_rate": 0.0,
"loss": 0.2956,
"step": 2440
},
{
"epoch": 20.0,
"eval_accuracy": 0.8696741854636592,
"eval_f1": 0.8368354828562441,
"eval_loss": 0.3397713303565979,
"eval_precision": 0.8520237470480189,
"eval_recall": 0.8252864157119476,
"eval_runtime": 1.7047,
"eval_samples_per_second": 234.055,
"eval_steps_per_second": 29.33,
"step": 2440
},
{
"epoch": 20.0,
"step": 2440,
"total_flos": 7604291693904000.0,
"train_loss": 0.36226015403622486,
"train_runtime": 612.3732,
"train_samples_per_second": 118.816,
"train_steps_per_second": 3.984
}
],
"logging_steps": 500,
"max_steps": 2440,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 7604291693904000.0,
"train_batch_size": 30,
"trial_name": null,
"trial_params": null
}