emotion-visobert / trainer_state.json
AnnyNguyen's picture
Upload trainer_state.json with huggingface_hub
a720df4 verified
{
"best_global_step": 3654,
"best_metric": 0.634271615758239,
"best_model_checkpoint": "outputs/visobert/checkpoint-3654",
"epoch": 21.0,
"eval_steps": 500,
"global_step": 3654,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9942528735632183,
"grad_norm": 4.714195728302002,
"learning_rate": 6.88e-06,
"loss": 1.7365,
"step": 173
},
{
"epoch": 1.0,
"eval_accuracy": 0.5495626822157434,
"eval_loss": 1.3077976703643799,
"eval_macro_f1": 0.3621719465508691,
"eval_runtime": 1.0013,
"eval_samples_per_second": 685.088,
"eval_steps_per_second": 21.971,
"step": 174
},
{
"epoch": 1.9885057471264367,
"grad_norm": 7.385402679443359,
"learning_rate": 1.38e-05,
"loss": 1.0546,
"step": 346
},
{
"epoch": 2.0,
"eval_accuracy": 0.6501457725947521,
"eval_loss": 0.9251817464828491,
"eval_macro_f1": 0.5626337451346989,
"eval_runtime": 1.017,
"eval_samples_per_second": 674.552,
"eval_steps_per_second": 21.633,
"step": 348
},
{
"epoch": 2.9827586206896552,
"grad_norm": 6.6963605880737305,
"learning_rate": 1.999994401896218e-05,
"loss": 0.6967,
"step": 519
},
{
"epoch": 3.0,
"eval_accuracy": 0.6690962099125365,
"eval_loss": 0.9223118424415588,
"eval_macro_f1": 0.5955069733934168,
"eval_runtime": 1.0189,
"eval_samples_per_second": 673.298,
"eval_steps_per_second": 21.593,
"step": 522
},
{
"epoch": 3.9770114942528734,
"grad_norm": 5.811980724334717,
"learning_rate": 1.9993697433304146e-05,
"loss": 0.3928,
"step": 692
},
{
"epoch": 4.0,
"eval_accuracy": 0.6603498542274052,
"eval_loss": 1.0274471044540405,
"eval_macro_f1": 0.6117238630327387,
"eval_runtime": 1.0201,
"eval_samples_per_second": 672.464,
"eval_steps_per_second": 21.566,
"step": 696
},
{
"epoch": 4.971264367816092,
"grad_norm": 16.778417587280273,
"learning_rate": 1.9977115923137912e-05,
"loss": 0.1721,
"step": 865
},
{
"epoch": 5.0,
"eval_accuracy": 0.6530612244897959,
"eval_loss": 1.358595371246338,
"eval_macro_f1": 0.5994376771063253,
"eval_runtime": 1.0217,
"eval_samples_per_second": 671.439,
"eval_steps_per_second": 21.533,
"step": 870
},
{
"epoch": 5.9655172413793105,
"grad_norm": 4.333595275878906,
"learning_rate": 1.995021663613649e-05,
"loss": 0.0717,
"step": 1038
},
{
"epoch": 6.0,
"eval_accuracy": 0.6516034985422741,
"eval_loss": 1.6116644144058228,
"eval_macro_f1": 0.5975804141258729,
"eval_runtime": 1.0219,
"eval_samples_per_second": 671.317,
"eval_steps_per_second": 21.529,
"step": 1044
},
{
"epoch": 6.959770114942529,
"grad_norm": 10.536818504333496,
"learning_rate": 1.991302739004225e-05,
"loss": 0.0408,
"step": 1211
},
{
"epoch": 7.0,
"eval_accuracy": 0.6661807580174927,
"eval_loss": 1.761244297027588,
"eval_macro_f1": 0.6135654899117211,
"eval_runtime": 1.0232,
"eval_samples_per_second": 670.433,
"eval_steps_per_second": 21.501,
"step": 1218
},
{
"epoch": 7.954022988505747,
"grad_norm": 0.06934899091720581,
"learning_rate": 1.9865586643899352e-05,
"loss": 0.0189,
"step": 1384
},
{
"epoch": 8.0,
"eval_accuracy": 0.6545189504373178,
"eval_loss": 2.068016767501831,
"eval_macro_f1": 0.608371709261751,
"eval_runtime": 1.0251,
"eval_samples_per_second": 669.232,
"eval_steps_per_second": 21.462,
"step": 1392
},
{
"epoch": 8.948275862068966,
"grad_norm": 0.2869434654712677,
"learning_rate": 1.9807943458281543e-05,
"loss": 0.01,
"step": 1557
},
{
"epoch": 9.0,
"eval_accuracy": 0.6618075801749271,
"eval_loss": 2.1802825927734375,
"eval_macro_f1": 0.6124464449897763,
"eval_runtime": 1.0247,
"eval_samples_per_second": 669.457,
"eval_steps_per_second": 21.469,
"step": 1566
},
{
"epoch": 9.942528735632184,
"grad_norm": 0.07617083936929703,
"learning_rate": 1.974015744455647e-05,
"loss": 0.0089,
"step": 1730
},
{
"epoch": 10.0,
"eval_accuracy": 0.6559766763848397,
"eval_loss": 2.3100554943084717,
"eval_macro_f1": 0.6074241333392209,
"eval_runtime": 1.0231,
"eval_samples_per_second": 670.506,
"eval_steps_per_second": 21.503,
"step": 1740
},
{
"epoch": 10.936781609195402,
"grad_norm": 4.301032066345215,
"learning_rate": 1.9662298703238904e-05,
"loss": 0.0087,
"step": 1903
},
{
"epoch": 11.0,
"eval_accuracy": 0.6530612244897959,
"eval_loss": 2.3289756774902344,
"eval_macro_f1": 0.6128782018168504,
"eval_runtime": 1.0248,
"eval_samples_per_second": 669.387,
"eval_steps_per_second": 21.467,
"step": 1914
},
{
"epoch": 11.931034482758621,
"grad_norm": 0.050387851893901825,
"learning_rate": 1.9574447751496706e-05,
"loss": 0.0095,
"step": 2076
},
{
"epoch": 12.0,
"eval_accuracy": 0.6676384839650146,
"eval_loss": 2.365588903427124,
"eval_macro_f1": 0.6232160117095836,
"eval_runtime": 1.0257,
"eval_samples_per_second": 668.809,
"eval_steps_per_second": 21.449,
"step": 2088
},
{
"epoch": 12.925287356321839,
"grad_norm": 0.19025640189647675,
"learning_rate": 1.9476695439884466e-05,
"loss": 0.0082,
"step": 2249
},
{
"epoch": 13.0,
"eval_accuracy": 0.6472303206997084,
"eval_loss": 2.510204553604126,
"eval_macro_f1": 0.6009794079827844,
"eval_runtime": 1.0241,
"eval_samples_per_second": 669.83,
"eval_steps_per_second": 21.481,
"step": 2262
},
{
"epoch": 13.919540229885058,
"grad_norm": 0.009207348339259624,
"learning_rate": 1.9369142858390936e-05,
"loss": 0.0077,
"step": 2422
},
{
"epoch": 14.0,
"eval_accuracy": 0.6632653061224489,
"eval_loss": 2.5246763229370117,
"eval_macro_f1": 0.6098772695075174,
"eval_runtime": 1.0232,
"eval_samples_per_second": 670.437,
"eval_steps_per_second": 21.501,
"step": 2436
},
{
"epoch": 14.913793103448276,
"grad_norm": 0.04841063916683197,
"learning_rate": 1.9251901231897364e-05,
"loss": 0.0087,
"step": 2595
},
{
"epoch": 15.0,
"eval_accuracy": 0.6618075801749271,
"eval_loss": 2.6046578884124756,
"eval_macro_f1": 0.5938506871223318,
"eval_runtime": 1.0232,
"eval_samples_per_second": 670.439,
"eval_steps_per_second": 21.501,
"step": 2610
},
{
"epoch": 15.908045977011493,
"grad_norm": 0.01879715360701084,
"learning_rate": 1.9125091805154977e-05,
"loss": 0.0074,
"step": 2768
},
{
"epoch": 16.0,
"eval_accuracy": 0.6603498542274052,
"eval_loss": 2.5505387783050537,
"eval_macro_f1": 0.62519263706606,
"eval_runtime": 1.0233,
"eval_samples_per_second": 670.404,
"eval_steps_per_second": 21.5,
"step": 2784
},
{
"epoch": 16.902298850574713,
"grad_norm": 0.006341532338410616,
"learning_rate": 1.8988845717400375e-05,
"loss": 0.0078,
"step": 2941
},
{
"epoch": 17.0,
"eval_accuracy": 0.6603498542274052,
"eval_loss": 2.581911563873291,
"eval_macro_f1": 0.6102357854686191,
"eval_runtime": 1.0227,
"eval_samples_per_second": 670.775,
"eval_steps_per_second": 21.512,
"step": 2958
},
{
"epoch": 17.896551724137932,
"grad_norm": 13.61514949798584,
"learning_rate": 1.884330386673869e-05,
"loss": 0.0158,
"step": 3114
},
{
"epoch": 18.0,
"eval_accuracy": 0.6530612244897959,
"eval_loss": 2.585244655609131,
"eval_macro_f1": 0.6128145704869843,
"eval_runtime": 1.0227,
"eval_samples_per_second": 670.768,
"eval_steps_per_second": 21.512,
"step": 3132
},
{
"epoch": 18.89080459770115,
"grad_norm": 22.37177085876465,
"learning_rate": 1.8688616764434577e-05,
"loss": 0.015,
"step": 3287
},
{
"epoch": 19.0,
"eval_accuracy": 0.6443148688046647,
"eval_loss": 2.7335152626037598,
"eval_macro_f1": 0.6043926085724337,
"eval_runtime": 1.0218,
"eval_samples_per_second": 671.336,
"eval_steps_per_second": 21.53,
"step": 3306
},
{
"epoch": 19.885057471264368,
"grad_norm": 0.02293994091451168,
"learning_rate": 1.8524944379261892e-05,
"loss": 0.0118,
"step": 3460
},
{
"epoch": 20.0,
"eval_accuracy": 0.6428571428571429,
"eval_loss": 2.8105897903442383,
"eval_macro_f1": 0.6045183468357809,
"eval_runtime": 1.0238,
"eval_samples_per_second": 670.049,
"eval_steps_per_second": 21.488,
"step": 3480
},
{
"epoch": 20.879310344827587,
"grad_norm": 17.96685791015625,
"learning_rate": 1.8352455972072908e-05,
"loss": 0.0087,
"step": 3633
},
{
"epoch": 21.0,
"eval_accuracy": 0.6749271137026239,
"eval_loss": 2.626377582550049,
"eval_macro_f1": 0.634271615758239,
"eval_runtime": 1.0224,
"eval_samples_per_second": 670.996,
"eval_steps_per_second": 21.519,
"step": 3654
}
],
"logging_steps": 173,
"max_steps": 17400,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.04182224766452e+16,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}