oss_sentiment / trainer_state.json
Dang-gu's picture
upload trained KoBERT sentiment classifier
ee86480 verified
{
"best_global_step": 488,
"best_metric": 0.7802051305770874,
"best_model_checkpoint": "/content/drive/MyDrive/Colab_Notebooks/OSS_AI/Model/Sentiment/checkpoint-488",
"epoch": 15.0,
"eval_steps": 500,
"global_step": 1830,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.819672131147541,
"grad_norm": 4.7928876876831055,
"learning_rate": 1.8918032786885248e-05,
"loss": 0.8341,
"step": 100
},
{
"epoch": 1.0,
"eval_accuracy": 0.6639175257731958,
"eval_f1": 0.5896451575310832,
"eval_loss": 0.813234806060791,
"eval_precision": 0.5565457468219701,
"eval_recall": 0.6639175257731958,
"eval_runtime": 6.0202,
"eval_samples_per_second": 161.123,
"eval_steps_per_second": 5.149,
"step": 122
},
{
"epoch": 1.639344262295082,
"grad_norm": 2.870753765106201,
"learning_rate": 1.782513661202186e-05,
"loss": 0.8325,
"step": 200
},
{
"epoch": 2.0,
"eval_accuracy": 0.654639175257732,
"eval_f1": 0.5932971966767757,
"eval_loss": 0.8132595419883728,
"eval_precision": 0.551828540423223,
"eval_recall": 0.654639175257732,
"eval_runtime": 6.0123,
"eval_samples_per_second": 161.335,
"eval_steps_per_second": 5.156,
"step": 244
},
{
"epoch": 2.459016393442623,
"grad_norm": 3.2532968521118164,
"learning_rate": 1.6732240437158473e-05,
"loss": 0.7854,
"step": 300
},
{
"epoch": 3.0,
"eval_accuracy": 0.6505154639175258,
"eval_f1": 0.5982517804485575,
"eval_loss": 0.83302241563797,
"eval_precision": 0.5562325921775969,
"eval_recall": 0.6505154639175258,
"eval_runtime": 5.9999,
"eval_samples_per_second": 161.67,
"eval_steps_per_second": 5.167,
"step": 366
},
{
"epoch": 3.278688524590164,
"grad_norm": 4.2627668380737305,
"learning_rate": 1.5639344262295084e-05,
"loss": 0.764,
"step": 400
},
{
"epoch": 4.0,
"eval_accuracy": 0.668041237113402,
"eval_f1": 0.5974166244530936,
"eval_loss": 0.7802051305770874,
"eval_precision": 0.5622140014030379,
"eval_recall": 0.668041237113402,
"eval_runtime": 5.9839,
"eval_samples_per_second": 162.102,
"eval_steps_per_second": 5.181,
"step": 488
},
{
"epoch": 4.098360655737705,
"grad_norm": 4.526758193969727,
"learning_rate": 1.4546448087431694e-05,
"loss": 0.7476,
"step": 500
},
{
"epoch": 4.918032786885246,
"grad_norm": 11.85865306854248,
"learning_rate": 1.3453551912568309e-05,
"loss": 0.7092,
"step": 600
},
{
"epoch": 5.0,
"eval_accuracy": 0.6556701030927835,
"eval_f1": 0.6157794236906242,
"eval_loss": 0.8059830665588379,
"eval_precision": 0.6187547339732196,
"eval_recall": 0.6556701030927835,
"eval_runtime": 5.9817,
"eval_samples_per_second": 162.162,
"eval_steps_per_second": 5.182,
"step": 610
},
{
"epoch": 5.737704918032787,
"grad_norm": 5.507380485534668,
"learning_rate": 1.236065573770492e-05,
"loss": 0.6902,
"step": 700
},
{
"epoch": 6.0,
"eval_accuracy": 0.6618556701030928,
"eval_f1": 0.6286838209004242,
"eval_loss": 0.8481519818305969,
"eval_precision": 0.6257801850028126,
"eval_recall": 0.6618556701030928,
"eval_runtime": 6.0029,
"eval_samples_per_second": 161.589,
"eval_steps_per_second": 5.164,
"step": 732
},
{
"epoch": 6.557377049180328,
"grad_norm": 6.127316474914551,
"learning_rate": 1.1267759562841532e-05,
"loss": 0.6641,
"step": 800
},
{
"epoch": 7.0,
"eval_accuracy": 0.6597938144329897,
"eval_f1": 0.6141479777735285,
"eval_loss": 0.8420491218566895,
"eval_precision": 0.6198812937813939,
"eval_recall": 0.6597938144329897,
"eval_runtime": 5.9773,
"eval_samples_per_second": 162.282,
"eval_steps_per_second": 5.186,
"step": 854
},
{
"epoch": 7.377049180327869,
"grad_norm": 5.021772861480713,
"learning_rate": 1.0174863387978143e-05,
"loss": 0.6252,
"step": 900
},
{
"epoch": 8.0,
"eval_accuracy": 0.6525773195876289,
"eval_f1": 0.6273821893464856,
"eval_loss": 0.8931316137313843,
"eval_precision": 0.622269948824605,
"eval_recall": 0.6525773195876289,
"eval_runtime": 6.0039,
"eval_samples_per_second": 161.561,
"eval_steps_per_second": 5.163,
"step": 976
},
{
"epoch": 8.19672131147541,
"grad_norm": 8.988058090209961,
"learning_rate": 9.081967213114755e-06,
"loss": 0.5966,
"step": 1000
},
{
"epoch": 9.0,
"eval_accuracy": 0.6288659793814433,
"eval_f1": 0.6245620136343327,
"eval_loss": 0.9026955962181091,
"eval_precision": 0.6209326156055649,
"eval_recall": 0.6288659793814433,
"eval_runtime": 6.0136,
"eval_samples_per_second": 161.301,
"eval_steps_per_second": 5.155,
"step": 1098
},
{
"epoch": 9.01639344262295,
"grad_norm": 3.686394453048706,
"learning_rate": 7.989071038251368e-06,
"loss": 0.5616,
"step": 1100
},
{
"epoch": 9.836065573770492,
"grad_norm": 7.0566725730896,
"learning_rate": 6.8961748633879785e-06,
"loss": 0.5189,
"step": 1200
},
{
"epoch": 10.0,
"eval_accuracy": 0.6587628865979381,
"eval_f1": 0.6377643226337417,
"eval_loss": 0.9417644143104553,
"eval_precision": 0.632075158238594,
"eval_recall": 0.6587628865979381,
"eval_runtime": 6.0444,
"eval_samples_per_second": 160.48,
"eval_steps_per_second": 5.129,
"step": 1220
},
{
"epoch": 10.655737704918034,
"grad_norm": 8.161457061767578,
"learning_rate": 5.803278688524591e-06,
"loss": 0.4474,
"step": 1300
},
{
"epoch": 11.0,
"eval_accuracy": 0.6175257731958763,
"eval_f1": 0.6167034673954828,
"eval_loss": 1.0151021480560303,
"eval_precision": 0.6162631882629445,
"eval_recall": 0.6175257731958763,
"eval_runtime": 6.0214,
"eval_samples_per_second": 161.093,
"eval_steps_per_second": 5.148,
"step": 1342
},
{
"epoch": 11.475409836065573,
"grad_norm": 5.476888179779053,
"learning_rate": 4.710382513661203e-06,
"loss": 0.4392,
"step": 1400
},
{
"epoch": 12.0,
"eval_accuracy": 0.6319587628865979,
"eval_f1": 0.6274605445869759,
"eval_loss": 1.0041959285736084,
"eval_precision": 0.6246406564180768,
"eval_recall": 0.6319587628865979,
"eval_runtime": 6.0632,
"eval_samples_per_second": 159.981,
"eval_steps_per_second": 5.113,
"step": 1464
},
{
"epoch": 12.295081967213115,
"grad_norm": 8.54333209991455,
"learning_rate": 3.6174863387978143e-06,
"loss": 0.4093,
"step": 1500
},
{
"epoch": 13.0,
"eval_accuracy": 0.6453608247422681,
"eval_f1": 0.6330366610202867,
"eval_loss": 1.0449495315551758,
"eval_precision": 0.626243455331529,
"eval_recall": 0.6453608247422681,
"eval_runtime": 6.0138,
"eval_samples_per_second": 161.296,
"eval_steps_per_second": 5.155,
"step": 1586
},
{
"epoch": 13.114754098360656,
"grad_norm": 5.127542972564697,
"learning_rate": 2.5245901639344268e-06,
"loss": 0.393,
"step": 1600
},
{
"epoch": 13.934426229508198,
"grad_norm": 9.779213905334473,
"learning_rate": 1.4316939890710382e-06,
"loss": 0.3686,
"step": 1700
},
{
"epoch": 14.0,
"eval_accuracy": 0.6237113402061856,
"eval_f1": 0.62069751108423,
"eval_loss": 1.0731914043426514,
"eval_precision": 0.6184136319581498,
"eval_recall": 0.6237113402061856,
"eval_runtime": 6.0154,
"eval_samples_per_second": 161.254,
"eval_steps_per_second": 5.153,
"step": 1708
},
{
"epoch": 14.754098360655737,
"grad_norm": 8.342227935791016,
"learning_rate": 3.387978142076503e-07,
"loss": 0.3683,
"step": 1800
},
{
"epoch": 15.0,
"eval_accuracy": 0.6278350515463917,
"eval_f1": 0.6219579485144054,
"eval_loss": 1.075632095336914,
"eval_precision": 0.6176398502758935,
"eval_recall": 0.6278350515463917,
"eval_runtime": 6.0468,
"eval_samples_per_second": 160.415,
"eval_steps_per_second": 5.127,
"step": 1830
}
],
"logging_steps": 100,
"max_steps": 1830,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3824353526676480.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}