grey-multilabel-classification / trainer_state.json
Saripudin's picture
Upload folder using huggingface_hub
0169065 verified
{
"best_metric": 0.017690911889076233,
"best_model_checkpoint": "grey-multilabel-classification-3/checkpoint-10720",
"epoch": 18.0,
"eval_steps": 500,
"global_step": 19296,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.4664179104477612,
"grad_norm": 0.3367554545402527,
"learning_rate": 1.953358208955224e-05,
"loss": 0.0801,
"step": 500
},
{
"epoch": 0.9328358208955224,
"grad_norm": 0.2535526752471924,
"learning_rate": 1.9067164179104477e-05,
"loss": 0.0452,
"step": 1000
},
{
"epoch": 1.0,
"eval_accuracy": 0.9954128111840032,
"eval_f1": 0.0,
"eval_loss": 0.021581802517175674,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_runtime": 13.6235,
"eval_samples_per_second": 39.344,
"eval_steps_per_second": 39.344,
"step": 1072
},
{
"epoch": 1.3992537313432836,
"grad_norm": 0.39555880427360535,
"learning_rate": 1.860074626865672e-05,
"loss": 0.0429,
"step": 1500
},
{
"epoch": 1.8656716417910446,
"grad_norm": 0.2884560227394104,
"learning_rate": 1.8134328358208956e-05,
"loss": 0.0433,
"step": 2000
},
{
"epoch": 2.0,
"eval_accuracy": 0.9955238204371616,
"eval_f1": 0.0713224368499257,
"eval_loss": 0.020623503252863884,
"eval_precision": 0.7384615384615385,
"eval_recall": 0.03747072599531616,
"eval_runtime": 13.1268,
"eval_samples_per_second": 40.832,
"eval_steps_per_second": 40.832,
"step": 2144
},
{
"epoch": 2.332089552238806,
"grad_norm": 0.31138914823532104,
"learning_rate": 1.7667910447761197e-05,
"loss": 0.0414,
"step": 2500
},
{
"epoch": 2.798507462686567,
"grad_norm": 0.28297460079193115,
"learning_rate": 1.7201492537313432e-05,
"loss": 0.0401,
"step": 3000
},
{
"epoch": 3.0,
"eval_accuracy": 0.9957243532815767,
"eval_f1": 0.19973190348525469,
"eval_loss": 0.019770797342061996,
"eval_precision": 0.7061611374407583,
"eval_recall": 0.11631537861046058,
"eval_runtime": 13.0974,
"eval_samples_per_second": 40.924,
"eval_steps_per_second": 40.924,
"step": 3216
},
{
"epoch": 3.264925373134328,
"grad_norm": 0.17224232852458954,
"learning_rate": 1.6735074626865673e-05,
"loss": 0.0388,
"step": 3500
},
{
"epoch": 3.7313432835820897,
"grad_norm": 0.23225364089012146,
"learning_rate": 1.626865671641791e-05,
"loss": 0.0381,
"step": 4000
},
{
"epoch": 4.0,
"eval_accuracy": 0.9958532672529865,
"eval_f1": 0.28606658446362515,
"eval_loss": 0.01913285069167614,
"eval_precision": 0.6803519061583577,
"eval_recall": 0.18110850897736144,
"eval_runtime": 13.086,
"eval_samples_per_second": 40.96,
"eval_steps_per_second": 40.96,
"step": 4288
},
{
"epoch": 4.197761194029851,
"grad_norm": 0.2774255573749542,
"learning_rate": 1.5802238805970152e-05,
"loss": 0.0362,
"step": 4500
},
{
"epoch": 4.664179104477612,
"grad_norm": 0.34336161613464355,
"learning_rate": 1.533582089552239e-05,
"loss": 0.0348,
"step": 5000
},
{
"epoch": 5.0,
"eval_accuracy": 0.9959105623513909,
"eval_f1": 0.29244114002478316,
"eval_loss": 0.01866454817354679,
"eval_precision": 0.7087087087087087,
"eval_recall": 0.18423106947697113,
"eval_runtime": 13.0537,
"eval_samples_per_second": 41.061,
"eval_steps_per_second": 41.061,
"step": 5360
},
{
"epoch": 5.130597014925373,
"grad_norm": 0.2382318079471588,
"learning_rate": 1.4869402985074627e-05,
"loss": 0.0349,
"step": 5500
},
{
"epoch": 5.597014925373134,
"grad_norm": 0.20123334228992462,
"learning_rate": 1.4402985074626867e-05,
"loss": 0.0334,
"step": 6000
},
{
"epoch": 6.0,
"eval_accuracy": 0.9959177242386914,
"eval_f1": 0.30317848410757947,
"eval_loss": 0.018318546935915947,
"eval_precision": 0.6985915492957746,
"eval_recall": 0.19359875097580015,
"eval_runtime": 13.2299,
"eval_samples_per_second": 40.514,
"eval_steps_per_second": 40.514,
"step": 6432
},
{
"epoch": 6.063432835820896,
"grad_norm": 0.2717812657356262,
"learning_rate": 1.3936567164179106e-05,
"loss": 0.0334,
"step": 6500
},
{
"epoch": 6.529850746268656,
"grad_norm": 0.2352936565876007,
"learning_rate": 1.3470149253731344e-05,
"loss": 0.0314,
"step": 7000
},
{
"epoch": 6.996268656716418,
"grad_norm": 0.3104737401008606,
"learning_rate": 1.3003731343283584e-05,
"loss": 0.0314,
"step": 7500
},
{
"epoch": 7.0,
"eval_accuracy": 0.9959606955624947,
"eval_f1": 0.3079754601226994,
"eval_loss": 0.018033917993307114,
"eval_precision": 0.7191977077363897,
"eval_recall": 0.1959406713505074,
"eval_runtime": 13.8378,
"eval_samples_per_second": 38.734,
"eval_steps_per_second": 38.734,
"step": 7504
},
{
"epoch": 7.462686567164179,
"grad_norm": 0.23265156149864197,
"learning_rate": 1.2537313432835823e-05,
"loss": 0.0295,
"step": 8000
},
{
"epoch": 7.92910447761194,
"grad_norm": 0.2608203887939453,
"learning_rate": 1.207089552238806e-05,
"loss": 0.0292,
"step": 8500
},
{
"epoch": 8.0,
"eval_accuracy": 0.9960358953791503,
"eval_f1": 0.351493848857645,
"eval_loss": 0.0177922360599041,
"eval_precision": 0.704225352112676,
"eval_recall": 0.234192037470726,
"eval_runtime": 12.9508,
"eval_samples_per_second": 41.387,
"eval_steps_per_second": 41.387,
"step": 8576
},
{
"epoch": 8.395522388059701,
"grad_norm": 0.28066638112068176,
"learning_rate": 1.1604477611940299e-05,
"loss": 0.0285,
"step": 9000
},
{
"epoch": 8.861940298507463,
"grad_norm": 0.3340807259082794,
"learning_rate": 1.1138059701492538e-05,
"loss": 0.0284,
"step": 9500
},
{
"epoch": 9.0,
"eval_accuracy": 0.9960573810410519,
"eval_f1": 0.3697767601602748,
"eval_loss": 0.017844857648015022,
"eval_precision": 0.6931330472103004,
"eval_recall": 0.25214676034348166,
"eval_runtime": 13.5821,
"eval_samples_per_second": 39.464,
"eval_steps_per_second": 39.464,
"step": 9648
},
{
"epoch": 9.328358208955224,
"grad_norm": 0.1388923078775406,
"learning_rate": 1.0671641791044778e-05,
"loss": 0.0275,
"step": 10000
},
{
"epoch": 9.794776119402986,
"grad_norm": 0.200577512383461,
"learning_rate": 1.0205223880597015e-05,
"loss": 0.0265,
"step": 10500
},
{
"epoch": 10.0,
"eval_accuracy": 0.9961039333085054,
"eval_f1": 0.3652275379229872,
"eval_loss": 0.017690911889076233,
"eval_precision": 0.7228637413394919,
"eval_recall": 0.24434035909445745,
"eval_runtime": 13.8518,
"eval_samples_per_second": 38.695,
"eval_steps_per_second": 38.695,
"step": 10720
},
{
"epoch": 10.261194029850746,
"grad_norm": 0.1815371811389923,
"learning_rate": 9.738805970149255e-06,
"loss": 0.0261,
"step": 11000
},
{
"epoch": 10.727611940298507,
"grad_norm": 0.21693415939807892,
"learning_rate": 9.272388059701494e-06,
"loss": 0.0251,
"step": 11500
},
{
"epoch": 11.0,
"eval_accuracy": 0.9959857621680466,
"eval_f1": 0.37825845812534664,
"eval_loss": 0.018003830686211586,
"eval_precision": 0.6532567049808429,
"eval_recall": 0.26619828259172523,
"eval_runtime": 13.2669,
"eval_samples_per_second": 40.401,
"eval_steps_per_second": 40.401,
"step": 11792
},
{
"epoch": 11.194029850746269,
"grad_norm": 0.2108106017112732,
"learning_rate": 8.805970149253732e-06,
"loss": 0.024,
"step": 12000
},
{
"epoch": 11.66044776119403,
"grad_norm": 0.3812670409679413,
"learning_rate": 8.339552238805972e-06,
"loss": 0.0245,
"step": 12500
},
{
"epoch": 12.0,
"eval_accuracy": 0.9960215716045492,
"eval_f1": 0.3984840281537629,
"eval_loss": 0.017995502799749374,
"eval_precision": 0.6501766784452296,
"eval_recall": 0.28727556596409054,
"eval_runtime": 14.2572,
"eval_samples_per_second": 37.595,
"eval_steps_per_second": 37.595,
"step": 12864
},
{
"epoch": 12.126865671641792,
"grad_norm": 0.1880696564912796,
"learning_rate": 7.87313432835821e-06,
"loss": 0.0235,
"step": 13000
},
{
"epoch": 12.593283582089553,
"grad_norm": 0.1416264921426773,
"learning_rate": 7.406716417910448e-06,
"loss": 0.0227,
"step": 13500
},
{
"epoch": 13.0,
"eval_accuracy": 0.996071704815653,
"eval_f1": 0.3989041095890411,
"eval_loss": 0.018045414239168167,
"eval_precision": 0.6691176470588235,
"eval_recall": 0.28415300546448086,
"eval_runtime": 14.1382,
"eval_samples_per_second": 37.912,
"eval_steps_per_second": 37.912,
"step": 13936
},
{
"epoch": 13.059701492537313,
"grad_norm": 0.10282892733812332,
"learning_rate": 6.9402985074626876e-06,
"loss": 0.0231,
"step": 14000
},
{
"epoch": 13.526119402985074,
"grad_norm": 0.22511619329452515,
"learning_rate": 6.473880597014925e-06,
"loss": 0.0225,
"step": 14500
},
{
"epoch": 13.992537313432836,
"grad_norm": 0.2805801331996918,
"learning_rate": 6.007462686567165e-06,
"loss": 0.0223,
"step": 15000
},
{
"epoch": 14.0,
"eval_accuracy": 0.9960108287735984,
"eval_f1": 0.40171858216971,
"eval_loss": 0.018198266625404358,
"eval_precision": 0.6437177280550774,
"eval_recall": 0.29195940671350507,
"eval_runtime": 13.2494,
"eval_samples_per_second": 40.455,
"eval_steps_per_second": 40.455,
"step": 15008
},
{
"epoch": 14.458955223880597,
"grad_norm": 0.2002527117729187,
"learning_rate": 5.5410447761194035e-06,
"loss": 0.0211,
"step": 15500
},
{
"epoch": 14.925373134328359,
"grad_norm": 0.050380606204271317,
"learning_rate": 5.074626865671642e-06,
"loss": 0.0214,
"step": 16000
},
{
"epoch": 15.0,
"eval_accuracy": 0.9960860285902541,
"eval_f1": 0.40630092341118956,
"eval_loss": 0.018185345456004143,
"eval_precision": 0.6678571428571428,
"eval_recall": 0.29195940671350507,
"eval_runtime": 13.1752,
"eval_samples_per_second": 40.683,
"eval_steps_per_second": 40.683,
"step": 16080
},
{
"epoch": 15.39179104477612,
"grad_norm": 0.29372188448905945,
"learning_rate": 4.6082089552238816e-06,
"loss": 0.0205,
"step": 16500
},
{
"epoch": 15.85820895522388,
"grad_norm": 0.145236998796463,
"learning_rate": 4.141791044776119e-06,
"loss": 0.0213,
"step": 17000
},
{
"epoch": 16.0,
"eval_accuracy": 0.9960573810410519,
"eval_f1": 0.40582838640043173,
"eval_loss": 0.018394598737359047,
"eval_precision": 0.6573426573426573,
"eval_recall": 0.2935206869633099,
"eval_runtime": 13.885,
"eval_samples_per_second": 38.603,
"eval_steps_per_second": 38.603,
"step": 17152
},
{
"epoch": 16.324626865671643,
"grad_norm": 0.3503226637840271,
"learning_rate": 3.6753731343283584e-06,
"loss": 0.0205,
"step": 17500
},
{
"epoch": 16.791044776119403,
"grad_norm": 0.22795088589191437,
"learning_rate": 3.208955223880597e-06,
"loss": 0.0198,
"step": 18000
},
{
"epoch": 17.0,
"eval_accuracy": 0.9960108287735984,
"eval_f1": 0.40807651434643993,
"eval_loss": 0.018566885963082314,
"eval_precision": 0.6389351081530782,
"eval_recall": 0.2997658079625293,
"eval_runtime": 13.6531,
"eval_samples_per_second": 39.259,
"eval_steps_per_second": 39.259,
"step": 18224
},
{
"epoch": 17.257462686567163,
"grad_norm": 0.09603070467710495,
"learning_rate": 2.742537313432836e-06,
"loss": 0.0198,
"step": 18500
},
{
"epoch": 17.723880597014926,
"grad_norm": 0.3136885166168213,
"learning_rate": 2.2761194029850747e-06,
"loss": 0.0197,
"step": 19000
},
{
"epoch": 18.0,
"eval_accuracy": 0.9960538000974016,
"eval_f1": 0.42,
"eval_loss": 0.018600279465317726,
"eval_precision": 0.6445880452342488,
"eval_recall": 0.3114754098360656,
"eval_runtime": 12.9973,
"eval_samples_per_second": 41.239,
"eval_steps_per_second": 41.239,
"step": 19296
}
],
"logging_steps": 500,
"max_steps": 21440,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2579840201687040.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}