t5-small-exsp / trainer_state.json
azaraks's picture
Upload folder using huggingface_hub
869169b verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 1000,
"global_step": 5600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.17857142857142858,
"grad_norm": 36795.6953125,
"learning_rate": 1.785714285714286e-05,
"loss": 11.5686,
"step": 100
},
{
"epoch": 0.35714285714285715,
"grad_norm": 15962.9775390625,
"learning_rate": 3.571428571428572e-05,
"loss": 6.6701,
"step": 200
},
{
"epoch": 0.5357142857142857,
"grad_norm": 22162.76953125,
"learning_rate": 4.999825642177387e-05,
"loss": 4.7406,
"step": 300
},
{
"epoch": 0.7142857142857143,
"grad_norm": 22004.275390625,
"learning_rate": 4.9937256716606394e-05,
"loss": 4.7143,
"step": 400
},
{
"epoch": 0.8928571428571429,
"grad_norm": 19477.404296875,
"learning_rate": 4.978932115289164e-05,
"loss": 4.6796,
"step": 500
},
{
"epoch": 1.0714285714285714,
"grad_norm": 20002.865234375,
"learning_rate": 4.955496546118439e-05,
"loss": 4.6291,
"step": 600
},
{
"epoch": 1.25,
"grad_norm": 19484.634765625,
"learning_rate": 4.923500664848326e-05,
"loss": 4.58,
"step": 700
},
{
"epoch": 1.4285714285714286,
"grad_norm": 18751.654296875,
"learning_rate": 4.883056014999423e-05,
"loss": 4.5509,
"step": 800
},
{
"epoch": 1.6071428571428572,
"grad_norm": 18834.03515625,
"learning_rate": 4.834303594051854e-05,
"loss": 4.5004,
"step": 900
},
{
"epoch": 1.7857142857142856,
"grad_norm": 17574.287109375,
"learning_rate": 4.7774133619021514e-05,
"loss": 4.4528,
"step": 1000
},
{
"epoch": 1.7857142857142856,
"eval_cer_score": 0.996767055081083,
"eval_loss": 4.664954662322998,
"eval_runtime": 7.0528,
"eval_samples_per_second": 35.447,
"eval_steps_per_second": 5.955,
"eval_wer_score": 1.0,
"step": 1000
},
{
"epoch": 1.9642857142857144,
"grad_norm": 17599.75,
"learning_rate": 4.7125836483518276e-05,
"loss": 4.4168,
"step": 1100
},
{
"epoch": 2.142857142857143,
"grad_norm": 15682.912109375,
"learning_rate": 4.6400404616932505e-05,
"loss": 4.3702,
"step": 1200
},
{
"epoch": 2.3214285714285716,
"grad_norm": 17537.24609375,
"learning_rate": 4.560036700803213e-05,
"loss": 4.3231,
"step": 1300
},
{
"epoch": 2.5,
"grad_norm": 18234.52734375,
"learning_rate": 4.4728512734909844e-05,
"loss": 4.2926,
"step": 1400
},
{
"epoch": 2.678571428571429,
"grad_norm": 18046.029296875,
"learning_rate": 4.378788124174441e-05,
"loss": 4.2644,
"step": 1500
},
{
"epoch": 2.857142857142857,
"grad_norm": 17269.84765625,
"learning_rate": 4.2781751742739885e-05,
"loss": 4.2458,
"step": 1600
},
{
"epoch": 3.0357142857142856,
"grad_norm": 16300.3486328125,
"learning_rate": 4.1713631790182364e-05,
"loss": 4.2229,
"step": 1700
},
{
"epoch": 3.2142857142857144,
"grad_norm": 16482.568359375,
"learning_rate": 4.058724504646834e-05,
"loss": 4.1953,
"step": 1800
},
{
"epoch": 3.392857142857143,
"grad_norm": 15220.8369140625,
"learning_rate": 3.9406518302733416e-05,
"loss": 4.1883,
"step": 1900
},
{
"epoch": 3.571428571428571,
"grad_norm": 14242.6416015625,
"learning_rate": 3.817556778933698e-05,
"loss": 4.1858,
"step": 2000
},
{
"epoch": 3.571428571428571,
"eval_cer_score": 0.9825155884420779,
"eval_loss": 4.381639003753662,
"eval_runtime": 6.4858,
"eval_samples_per_second": 38.546,
"eval_steps_per_second": 6.476,
"eval_wer_score": 1.0,
"step": 2000
},
{
"epoch": 3.75,
"grad_norm": 14843.748046875,
"learning_rate": 3.689868482592684e-05,
"loss": 4.1807,
"step": 2100
},
{
"epoch": 3.928571428571429,
"grad_norm": 29719.095703125,
"learning_rate": 3.5580320861110625e-05,
"loss": 4.1251,
"step": 2200
},
{
"epoch": 4.107142857142857,
"grad_norm": 17429.841796875,
"learning_rate": 3.4225071953887976e-05,
"loss": 4.031,
"step": 2300
},
{
"epoch": 4.285714285714286,
"grad_norm": 28763.1015625,
"learning_rate": 3.2837662750944535e-05,
"loss": 4.0189,
"step": 2400
},
{
"epoch": 4.464285714285714,
"grad_norm": 16039.8037109375,
"learning_rate": 3.1422930015665484e-05,
"loss": 4.0042,
"step": 2500
},
{
"epoch": 4.642857142857143,
"grad_norm": 27426.4765625,
"learning_rate": 2.9985805766289817e-05,
"loss": 3.9906,
"step": 2600
},
{
"epoch": 4.821428571428571,
"grad_norm": 17122.708984375,
"learning_rate": 2.853130008198855e-05,
"loss": 3.9787,
"step": 2700
},
{
"epoch": 5.0,
"grad_norm": 25625.185546875,
"learning_rate": 2.7064483636808313e-05,
"loss": 3.9773,
"step": 2800
},
{
"epoch": 5.178571428571429,
"grad_norm": 14854.916015625,
"learning_rate": 2.559047002236995e-05,
"loss": 3.9664,
"step": 2900
},
{
"epoch": 5.357142857142857,
"grad_norm": 22451.359375,
"learning_rate": 2.4114397920948657e-05,
"loss": 3.9619,
"step": 3000
},
{
"epoch": 5.357142857142857,
"eval_cer_score": 0.9613026290008414,
"eval_loss": 4.328557968139648,
"eval_runtime": 6.6271,
"eval_samples_per_second": 37.724,
"eval_steps_per_second": 6.338,
"eval_wer_score": 1.437584410573027,
"step": 3000
},
{
"epoch": 5.535714285714286,
"grad_norm": 14314.1396484375,
"learning_rate": 2.2641413191083445e-05,
"loss": 3.9524,
"step": 3100
},
{
"epoch": 5.714285714285714,
"grad_norm": 21002.537109375,
"learning_rate": 2.117665092816885e-05,
"loss": 3.9353,
"step": 3200
},
{
"epoch": 5.892857142857143,
"grad_norm": 12572.75,
"learning_rate": 1.9725217562568948e-05,
"loss": 3.9404,
"step": 3300
},
{
"epoch": 6.071428571428571,
"grad_norm": 18614.310546875,
"learning_rate": 1.829217305766289e-05,
"loss": 3.9334,
"step": 3400
},
{
"epoch": 6.25,
"grad_norm": 11758.970703125,
"learning_rate": 1.6882513269882917e-05,
"loss": 3.9161,
"step": 3500
},
{
"epoch": 6.428571428571429,
"grad_norm": 16748.3984375,
"learning_rate": 1.5501152532241005e-05,
"loss": 3.9079,
"step": 3600
},
{
"epoch": 6.607142857142857,
"grad_norm": 9738.3134765625,
"learning_rate": 1.4152906522061048e-05,
"loss": 3.9186,
"step": 3700
},
{
"epoch": 6.785714285714286,
"grad_norm": 15285.71875,
"learning_rate": 1.2842475472642968e-05,
"loss": 3.9058,
"step": 3800
},
{
"epoch": 6.964285714285714,
"grad_norm": 8818.5263671875,
"learning_rate": 1.1574427787385852e-05,
"loss": 3.9063,
"step": 3900
},
{
"epoch": 7.142857142857143,
"grad_norm": 12031.4169921875,
"learning_rate": 1.0353184113494386e-05,
"loss": 3.8988,
"step": 4000
},
{
"epoch": 7.142857142857143,
"eval_cer_score": 0.9185309406083239,
"eval_loss": 4.195689678192139,
"eval_runtime": 6.7441,
"eval_samples_per_second": 37.069,
"eval_steps_per_second": 6.228,
"eval_wer_score": 1.2407871888867452,
"step": 4000
},
{
"epoch": 7.321428571428571,
"grad_norm": 7690.8115234375,
"learning_rate": 9.183001930790483e-06,
"loss": 3.9009,
"step": 4100
},
{
"epoch": 7.5,
"grad_norm": 9677.2919921875,
"learning_rate": 8.067960709356478e-06,
"loss": 3.8754,
"step": 4200
},
{
"epoch": 7.678571428571429,
"grad_norm": 6019.1201171875,
"learning_rate": 7.011947687752804e-06,
"loss": 3.884,
"step": 4300
},
{
"epoch": 7.857142857142857,
"grad_norm": 7279.31005859375,
"learning_rate": 6.018644321390288e-06,
"loss": 3.8809,
"step": 4400
},
{
"epoch": 8.035714285714286,
"grad_norm": 4656.92138671875,
"learning_rate": 5.091513448300142e-06,
"loss": 3.8814,
"step": 4500
},
{
"epoch": 8.214285714285714,
"grad_norm": 4017.18896484375,
"learning_rate": 4.23378721704443e-06,
"loss": 3.8856,
"step": 4600
},
{
"epoch": 8.392857142857142,
"grad_norm": 4232.76953125,
"learning_rate": 3.448455818852267e-06,
"loss": 3.8654,
"step": 4700
},
{
"epoch": 8.571428571428571,
"grad_norm": 2418.802490234375,
"learning_rate": 2.7382570632638854e-06,
"loss": 3.8729,
"step": 4800
},
{
"epoch": 8.75,
"grad_norm": 69.66502380371094,
"learning_rate": 2.1056668336235622e-06,
"loss": 3.8814,
"step": 4900
},
{
"epoch": 8.928571428571429,
"grad_norm": 919.9892578125,
"learning_rate": 1.552890455695369e-06,
"loss": 3.8713,
"step": 5000
},
{
"epoch": 8.928571428571429,
"eval_cer_score": 0.9372601224024065,
"eval_loss": 4.1322102546691895,
"eval_runtime": 6.5532,
"eval_samples_per_second": 38.149,
"eval_steps_per_second": 6.409,
"eval_wer_score": 1.0818059039166505,
"step": 5000
},
{
"epoch": 9.107142857142858,
"grad_norm": 866.0170288085938,
"learning_rate": 1.081855009492383e-06,
"loss": 3.8652,
"step": 5100
},
{
"epoch": 9.285714285714286,
"grad_norm": 1821.4014892578125,
"learning_rate": 6.942026111217359e-07,
"loss": 3.8662,
"step": 5200
},
{
"epoch": 9.464285714285714,
"grad_norm": 121.43854522705078,
"learning_rate": 3.9128468806614306e-07,
"loss": 3.8734,
"step": 5300
},
{
"epoch": 9.642857142857142,
"grad_norm": 804.0866088867188,
"learning_rate": 1.7415726785939834e-07,
"loss": 3.8641,
"step": 5400
},
{
"epoch": 9.821428571428571,
"grad_norm": 238.31356811523438,
"learning_rate": 4.357729658039378e-08,
"loss": 3.8754,
"step": 5500
},
{
"epoch": 10.0,
"grad_norm": 12.470285415649414,
"learning_rate": 0.0,
"loss": 3.8738,
"step": 5600
}
],
"logging_steps": 100,
"max_steps": 5600,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.4758827926528e+16,
"train_batch_size": 224,
"trial_name": null,
"trial_params": null
}