emotion-textcnn / trainer_state.json
AnnyNguyen's picture
Upload trainer_state.json with huggingface_hub
c2b75b0 verified
{
"best_global_step": 3654,
"best_metric": 0.25175856147050574,
"best_model_checkpoint": "outputs/textcnn/checkpoint-3654",
"epoch": 42.0,
"eval_steps": 500,
"global_step": 3654,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 5.989197731018066,
"learning_rate": 3.44e-06,
"loss": 2.0625,
"step": 87
},
{
"epoch": 1.0,
"eval_accuracy": 0.31486880466472306,
"eval_loss": 1.9201620817184448,
"eval_macro_f1": 0.08069587306875443,
"eval_runtime": 0.0822,
"eval_samples_per_second": 8343.34,
"eval_steps_per_second": 133.785,
"step": 87
},
{
"epoch": 2.0,
"grad_norm": 4.812881946563721,
"learning_rate": 6.92e-06,
"loss": 2.0517,
"step": 174
},
{
"epoch": 2.0,
"eval_accuracy": 0.3163265306122449,
"eval_loss": 1.867976188659668,
"eval_macro_f1": 0.08271289631865439,
"eval_runtime": 0.0798,
"eval_samples_per_second": 8598.327,
"eval_steps_per_second": 137.874,
"step": 174
},
{
"epoch": 3.0,
"grad_norm": 4.3668341636657715,
"learning_rate": 1.04e-05,
"loss": 1.9731,
"step": 261
},
{
"epoch": 3.0,
"eval_accuracy": 0.31924198250728864,
"eval_loss": 1.8050793409347534,
"eval_macro_f1": 0.09255094257382587,
"eval_runtime": 0.0765,
"eval_samples_per_second": 8966.769,
"eval_steps_per_second": 143.782,
"step": 261
},
{
"epoch": 4.0,
"grad_norm": 4.521817207336426,
"learning_rate": 1.3880000000000001e-05,
"loss": 1.9355,
"step": 348
},
{
"epoch": 4.0,
"eval_accuracy": 0.3206997084548105,
"eval_loss": 1.760697841644287,
"eval_macro_f1": 0.09828636171767466,
"eval_runtime": 0.0776,
"eval_samples_per_second": 8839.5,
"eval_steps_per_second": 141.741,
"step": 348
},
{
"epoch": 5.0,
"grad_norm": 4.453028678894043,
"learning_rate": 1.736e-05,
"loss": 1.9005,
"step": 435
},
{
"epoch": 5.0,
"eval_accuracy": 0.32653061224489793,
"eval_loss": 1.7365907430648804,
"eval_macro_f1": 0.11368274326806725,
"eval_runtime": 0.0784,
"eval_samples_per_second": 8750.81,
"eval_steps_per_second": 140.319,
"step": 435
},
{
"epoch": 6.0,
"grad_norm": 4.236560344696045,
"learning_rate": 1.999967634800249e-05,
"loss": 1.8734,
"step": 522
},
{
"epoch": 6.0,
"eval_accuracy": 0.33527696793002915,
"eval_loss": 1.7214981317520142,
"eval_macro_f1": 0.12326578903926995,
"eval_runtime": 0.0768,
"eval_samples_per_second": 8932.057,
"eval_steps_per_second": 143.225,
"step": 522
},
{
"epoch": 7.0,
"grad_norm": 4.54931640625,
"learning_rate": 1.999144090999249e-05,
"loss": 1.8459,
"step": 609
},
{
"epoch": 7.0,
"eval_accuracy": 0.33819241982507287,
"eval_loss": 1.7085658311843872,
"eval_macro_f1": 0.12473908000560015,
"eval_runtime": 0.0772,
"eval_samples_per_second": 8891.235,
"eval_steps_per_second": 142.571,
"step": 609
},
{
"epoch": 8.0,
"grad_norm": 4.285991191864014,
"learning_rate": 1.9972106098590665e-05,
"loss": 1.8271,
"step": 696
},
{
"epoch": 8.0,
"eval_accuracy": 0.3469387755102041,
"eval_loss": 1.6989842653274536,
"eval_macro_f1": 0.1374280909101405,
"eval_runtime": 0.0776,
"eval_samples_per_second": 8844.853,
"eval_steps_per_second": 141.827,
"step": 696
},
{
"epoch": 9.0,
"grad_norm": 5.04291296005249,
"learning_rate": 1.994169339261005e-05,
"loss": 1.8219,
"step": 783
},
{
"epoch": 9.0,
"eval_accuracy": 0.3498542274052478,
"eval_loss": 1.6909065246582031,
"eval_macro_f1": 0.14476905523124012,
"eval_runtime": 0.0766,
"eval_samples_per_second": 8950.395,
"eval_steps_per_second": 143.519,
"step": 783
},
{
"epoch": 10.0,
"grad_norm": 3.8076608180999756,
"learning_rate": 1.990023657716558e-05,
"loss": 1.8039,
"step": 870
},
{
"epoch": 10.0,
"eval_accuracy": 0.3469387755102041,
"eval_loss": 1.6828982830047607,
"eval_macro_f1": 0.14793175460560187,
"eval_runtime": 0.0764,
"eval_samples_per_second": 8983.678,
"eval_steps_per_second": 144.053,
"step": 870
},
{
"epoch": 11.0,
"grad_norm": 3.9515891075134277,
"learning_rate": 1.9847781706142608e-05,
"loss": 1.7898,
"step": 957
},
{
"epoch": 11.0,
"eval_accuracy": 0.35131195335276966,
"eval_loss": 1.675271987915039,
"eval_macro_f1": 0.15154267292502702,
"eval_runtime": 0.0776,
"eval_samples_per_second": 8837.816,
"eval_steps_per_second": 141.714,
"step": 957
},
{
"epoch": 12.0,
"grad_norm": 3.946139097213745,
"learning_rate": 1.978438705103621e-05,
"loss": 1.7634,
"step": 1044
},
{
"epoch": 12.0,
"eval_accuracy": 0.36151603498542273,
"eval_loss": 1.6689125299453735,
"eval_macro_f1": 0.16042602782078802,
"eval_runtime": 0.0767,
"eval_samples_per_second": 8948.781,
"eval_steps_per_second": 143.494,
"step": 1044
},
{
"epoch": 13.0,
"grad_norm": 3.8360438346862793,
"learning_rate": 1.9710123036218044e-05,
"loss": 1.7572,
"step": 1131
},
{
"epoch": 13.0,
"eval_accuracy": 0.37026239067055394,
"eval_loss": 1.6614633798599243,
"eval_macro_f1": 0.17028534014340227,
"eval_runtime": 0.0767,
"eval_samples_per_second": 8943.635,
"eval_steps_per_second": 143.411,
"step": 1131
},
{
"epoch": 14.0,
"grad_norm": 3.794384479522705,
"learning_rate": 1.962507216070276e-05,
"loss": 1.7411,
"step": 1218
},
{
"epoch": 14.0,
"eval_accuracy": 0.36151603498542273,
"eval_loss": 1.6555291414260864,
"eval_macro_f1": 0.17229172694357175,
"eval_runtime": 0.0764,
"eval_samples_per_second": 8976.952,
"eval_steps_per_second": 143.945,
"step": 1218
},
{
"epoch": 15.0,
"grad_norm": 3.697802782058716,
"learning_rate": 1.9529328906500833e-05,
"loss": 1.7355,
"step": 1305
},
{
"epoch": 15.0,
"eval_accuracy": 0.36443148688046645,
"eval_loss": 1.6497727632522583,
"eval_macro_f1": 0.17077990977186067,
"eval_runtime": 0.0762,
"eval_samples_per_second": 8997.528,
"eval_steps_per_second": 144.275,
"step": 1305
},
{
"epoch": 16.0,
"grad_norm": 4.296336650848389,
"learning_rate": 1.9422999633659592e-05,
"loss": 1.7163,
"step": 1392
},
{
"epoch": 16.0,
"eval_accuracy": 0.3717201166180758,
"eval_loss": 1.6435818672180176,
"eval_macro_f1": 0.1808240545174343,
"eval_runtime": 0.0787,
"eval_samples_per_second": 8721.685,
"eval_steps_per_second": 139.852,
"step": 1392
},
{
"epoch": 17.0,
"grad_norm": 4.240530490875244,
"learning_rate": 1.9306202462109128e-05,
"loss": 1.6979,
"step": 1479
},
{
"epoch": 17.0,
"eval_accuracy": 0.3760932944606414,
"eval_loss": 1.6384371519088745,
"eval_macro_f1": 0.18768397854098065,
"eval_runtime": 0.0781,
"eval_samples_per_second": 8785.572,
"eval_steps_per_second": 140.877,
"step": 1479
},
{
"epoch": 18.0,
"grad_norm": 4.70124626159668,
"learning_rate": 1.9179067140444246e-05,
"loss": 1.7027,
"step": 1566
},
{
"epoch": 18.0,
"eval_accuracy": 0.37317784256559766,
"eval_loss": 1.6329833269119263,
"eval_macro_f1": 0.1832569421283258,
"eval_runtime": 0.0773,
"eval_samples_per_second": 8871.989,
"eval_steps_per_second": 142.262,
"step": 1566
},
{
"epoch": 19.0,
"grad_norm": 4.254021167755127,
"learning_rate": 1.9041734901788285e-05,
"loss": 1.6776,
"step": 1653
},
{
"epoch": 19.0,
"eval_accuracy": 0.3760932944606414,
"eval_loss": 1.6269856691360474,
"eval_macro_f1": 0.18890492604023376,
"eval_runtime": 0.0793,
"eval_samples_per_second": 8655.516,
"eval_steps_per_second": 138.791,
"step": 1653
},
{
"epoch": 20.0,
"grad_norm": 3.7426421642303467,
"learning_rate": 1.8894358306898934e-05,
"loss": 1.6651,
"step": 1740
},
{
"epoch": 20.0,
"eval_accuracy": 0.37900874635568516,
"eval_loss": 1.621616005897522,
"eval_macro_f1": 0.1934068278580951,
"eval_runtime": 0.0784,
"eval_samples_per_second": 8746.314,
"eval_steps_per_second": 140.247,
"step": 1740
},
{
"epoch": 21.0,
"grad_norm": 4.341787338256836,
"learning_rate": 1.8737101074690274e-05,
"loss": 1.6694,
"step": 1827
},
{
"epoch": 21.0,
"eval_accuracy": 0.38338192419825073,
"eval_loss": 1.617226243019104,
"eval_macro_f1": 0.19340109033111008,
"eval_runtime": 0.0763,
"eval_samples_per_second": 8984.969,
"eval_steps_per_second": 144.074,
"step": 1827
},
{
"epoch": 22.0,
"grad_norm": 4.18576717376709,
"learning_rate": 1.8570137900359382e-05,
"loss": 1.6561,
"step": 1914
},
{
"epoch": 22.0,
"eval_accuracy": 0.38338192419825073,
"eval_loss": 1.6133029460906982,
"eval_macro_f1": 0.19942474851997433,
"eval_runtime": 0.0768,
"eval_samples_per_second": 8932.362,
"eval_steps_per_second": 143.23,
"step": 1914
},
{
"epoch": 23.0,
"grad_norm": 4.433280944824219,
"learning_rate": 1.8393654261319504e-05,
"loss": 1.6456,
"step": 2001
},
{
"epoch": 23.0,
"eval_accuracy": 0.3877551020408163,
"eval_loss": 1.6075658798217773,
"eval_macro_f1": 0.2021179986320824,
"eval_runtime": 0.0779,
"eval_samples_per_second": 8804.876,
"eval_steps_per_second": 141.186,
"step": 2001
},
{
"epoch": 24.0,
"grad_norm": 3.650712490081787,
"learning_rate": 1.8207846211155388e-05,
"loss": 1.6412,
"step": 2088
},
{
"epoch": 24.0,
"eval_accuracy": 0.39212827988338195,
"eval_loss": 1.6046576499938965,
"eval_macro_f1": 0.20558065728483735,
"eval_runtime": 0.0777,
"eval_samples_per_second": 8832.472,
"eval_steps_per_second": 141.629,
"step": 2088
},
{
"epoch": 25.0,
"grad_norm": 3.7270474433898926,
"learning_rate": 1.8012920161829693e-05,
"loss": 1.6369,
"step": 2175
},
{
"epoch": 25.0,
"eval_accuracy": 0.39504373177842567,
"eval_loss": 1.6002745628356934,
"eval_macro_f1": 0.2085329794328549,
"eval_runtime": 0.0763,
"eval_samples_per_second": 8986.091,
"eval_steps_per_second": 144.092,
"step": 2175
},
{
"epoch": 26.0,
"grad_norm": 3.5878360271453857,
"learning_rate": 1.7809092654382368e-05,
"loss": 1.6141,
"step": 2262
},
{
"epoch": 26.0,
"eval_accuracy": 0.39941690962099125,
"eval_loss": 1.5953983068466187,
"eval_macro_f1": 0.21136043336239665,
"eval_runtime": 0.0767,
"eval_samples_per_second": 8941.745,
"eval_steps_per_second": 143.381,
"step": 2262
},
{
"epoch": 27.0,
"grad_norm": 3.669312000274658,
"learning_rate": 1.7596590118377787e-05,
"loss": 1.5989,
"step": 2349
},
{
"epoch": 27.0,
"eval_accuracy": 0.40233236151603496,
"eval_loss": 1.5911133289337158,
"eval_macro_f1": 0.21358021621926357,
"eval_runtime": 0.0772,
"eval_samples_per_second": 8884.756,
"eval_steps_per_second": 142.467,
"step": 2349
},
{
"epoch": 28.0,
"grad_norm": 3.686958074569702,
"learning_rate": 1.7375648620366817e-05,
"loss": 1.6096,
"step": 2436
},
{
"epoch": 28.0,
"eval_accuracy": 0.40524781341107874,
"eval_loss": 1.5873298645019531,
"eval_macro_f1": 0.214485741970254,
"eval_runtime": 0.08,
"eval_samples_per_second": 8573.194,
"eval_steps_per_second": 137.471,
"step": 2436
},
{
"epoch": 29.0,
"grad_norm": 3.553083896636963,
"learning_rate": 1.7146513601643282e-05,
"loss": 1.6039,
"step": 2523
},
{
"epoch": 29.0,
"eval_accuracy": 0.4067055393586006,
"eval_loss": 1.584189534187317,
"eval_macro_f1": 0.21667857809163207,
"eval_runtime": 0.0762,
"eval_samples_per_second": 9000.399,
"eval_steps_per_second": 144.321,
"step": 2523
},
{
"epoch": 30.0,
"grad_norm": 3.9078423976898193,
"learning_rate": 1.6909439605586156e-05,
"loss": 1.5928,
"step": 2610
},
{
"epoch": 30.0,
"eval_accuracy": 0.40816326530612246,
"eval_loss": 1.579264521598816,
"eval_macro_f1": 0.21831730879606145,
"eval_runtime": 0.0775,
"eval_samples_per_second": 8854.652,
"eval_steps_per_second": 141.984,
"step": 2610
},
{
"epoch": 31.0,
"grad_norm": 3.7723805904388428,
"learning_rate": 1.6664689994890307e-05,
"loss": 1.5824,
"step": 2697
},
{
"epoch": 31.0,
"eval_accuracy": 0.40816326530612246,
"eval_loss": 1.5762993097305298,
"eval_macro_f1": 0.21682052505544805,
"eval_runtime": 0.0776,
"eval_samples_per_second": 8835.265,
"eval_steps_per_second": 141.673,
"step": 2697
},
{
"epoch": 32.0,
"grad_norm": 4.051678657531738,
"learning_rate": 1.641253665900002e-05,
"loss": 1.5877,
"step": 2784
},
{
"epoch": 32.0,
"eval_accuracy": 0.41545189504373176,
"eval_loss": 1.5732570886611938,
"eval_macro_f1": 0.2262251950436546,
"eval_runtime": 0.0769,
"eval_samples_per_second": 8918.878,
"eval_steps_per_second": 143.014,
"step": 2784
},
{
"epoch": 33.0,
"grad_norm": 3.396827459335327,
"learning_rate": 1.6153259712070225e-05,
"loss": 1.5722,
"step": 2871
},
{
"epoch": 33.0,
"eval_accuracy": 0.4110787172011662,
"eval_loss": 1.5706168413162231,
"eval_macro_f1": 0.22060087456248262,
"eval_runtime": 0.0769,
"eval_samples_per_second": 8923.138,
"eval_steps_per_second": 143.082,
"step": 2871
},
{
"epoch": 34.0,
"grad_norm": 3.510072708129883,
"learning_rate": 1.5887147181791e-05,
"loss": 1.5649,
"step": 2958
},
{
"epoch": 34.0,
"eval_accuracy": 0.41690962099125367,
"eval_loss": 1.5673753023147583,
"eval_macro_f1": 0.2265284337566022,
"eval_runtime": 0.0781,
"eval_samples_per_second": 8778.335,
"eval_steps_per_second": 140.76,
"step": 2958
},
{
"epoch": 35.0,
"grad_norm": 3.531944513320923,
"learning_rate": 1.5614494689421032e-05,
"loss": 1.5662,
"step": 3045
},
{
"epoch": 35.0,
"eval_accuracy": 0.4227405247813411,
"eval_loss": 1.5635616779327393,
"eval_macro_f1": 0.23237846476317717,
"eval_runtime": 0.0769,
"eval_samples_per_second": 8921.948,
"eval_steps_per_second": 143.063,
"step": 3045
},
{
"epoch": 36.0,
"grad_norm": 3.724010944366455,
"learning_rate": 1.533560512138543e-05,
"loss": 1.5545,
"step": 3132
},
{
"epoch": 36.0,
"eval_accuracy": 0.42419825072886297,
"eval_loss": 1.5617172718048096,
"eval_macro_f1": 0.23396270153240778,
"eval_runtime": 0.0766,
"eval_samples_per_second": 8960.737,
"eval_steps_per_second": 143.685,
"step": 3132
},
{
"epoch": 37.0,
"grad_norm": 3.6395723819732666,
"learning_rate": 1.5050788292802812e-05,
"loss": 1.5416,
"step": 3219
},
{
"epoch": 37.0,
"eval_accuracy": 0.43440233236151604,
"eval_loss": 1.5581672191619873,
"eval_macro_f1": 0.24389742844346657,
"eval_runtime": 0.0768,
"eval_samples_per_second": 8935.719,
"eval_steps_per_second": 143.284,
"step": 3219
},
{
"epoch": 38.0,
"grad_norm": 4.1144866943359375,
"learning_rate": 1.4760360603315362e-05,
"loss": 1.5351,
"step": 3306
},
{
"epoch": 38.0,
"eval_accuracy": 0.4329446064139942,
"eval_loss": 1.55453622341156,
"eval_macro_f1": 0.23991444298311637,
"eval_runtime": 0.0769,
"eval_samples_per_second": 8923.913,
"eval_steps_per_second": 143.095,
"step": 3306
},
{
"epoch": 39.0,
"grad_norm": 3.656245708465576,
"learning_rate": 1.4464644685604184e-05,
"loss": 1.5424,
"step": 3393
},
{
"epoch": 39.0,
"eval_accuracy": 0.4329446064139942,
"eval_loss": 1.5531222820281982,
"eval_macro_f1": 0.24107844449857171,
"eval_runtime": 0.0777,
"eval_samples_per_second": 8833.123,
"eval_steps_per_second": 141.639,
"step": 3393
},
{
"epoch": 40.0,
"grad_norm": 3.530606746673584,
"learning_rate": 1.41639690469805e-05,
"loss": 1.5232,
"step": 3480
},
{
"epoch": 40.0,
"eval_accuracy": 0.4329446064139942,
"eval_loss": 1.5503716468811035,
"eval_macro_f1": 0.24171834592844124,
"eval_runtime": 0.0767,
"eval_samples_per_second": 8939.189,
"eval_steps_per_second": 143.34,
"step": 3480
},
{
"epoch": 41.0,
"grad_norm": 3.4572715759277344,
"learning_rate": 1.3858667704450763e-05,
"loss": 1.5277,
"step": 3567
},
{
"epoch": 41.0,
"eval_accuracy": 0.43440233236151604,
"eval_loss": 1.5470139980316162,
"eval_macro_f1": 0.24209898836089624,
"eval_runtime": 0.0769,
"eval_samples_per_second": 8921.395,
"eval_steps_per_second": 143.054,
"step": 3567
},
{
"epoch": 42.0,
"grad_norm": 4.217586517333984,
"learning_rate": 1.3549079813661123e-05,
"loss": 1.5112,
"step": 3654
},
{
"epoch": 42.0,
"eval_accuracy": 0.4446064139941691,
"eval_loss": 1.5440438985824585,
"eval_macro_f1": 0.25175856147050574,
"eval_runtime": 0.0767,
"eval_samples_per_second": 8948.113,
"eval_steps_per_second": 143.483,
"step": 3654
}
],
"logging_steps": 87,
"max_steps": 8700,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}