ikema-asr-indomain / trainer_state.json
ctaguchi's picture
Model save
331124b verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 98.61538461538461,
"eval_steps": 100,
"global_step": 7100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.3916083916083917,
"grad_norm": 0.7105618119239807,
"learning_rate": 0.000285,
"loss": 11.1682,
"step": 100
},
{
"epoch": 1.3916083916083917,
"eval_cer": 0.9903044405662207,
"eval_loss": 3.855257987976074,
"eval_runtime": 6.2271,
"eval_samples_per_second": 45.768,
"eval_steps_per_second": 5.781,
"step": 100
},
{
"epoch": 2.7832167832167833,
"grad_norm": 1.5593620538711548,
"learning_rate": 0.0002959285714285714,
"loss": 3.9311,
"step": 200
},
{
"epoch": 2.7832167832167833,
"eval_cer": 0.9903044405662207,
"eval_loss": 3.8240556716918945,
"eval_runtime": 6.1912,
"eval_samples_per_second": 46.033,
"eval_steps_per_second": 5.815,
"step": 200
},
{
"epoch": 4.1678321678321675,
"grad_norm": 1.5898423194885254,
"learning_rate": 0.0002916428571428571,
"loss": 3.8623,
"step": 300
},
{
"epoch": 4.1678321678321675,
"eval_cer": 0.9903044405662207,
"eval_loss": 3.7760262489318848,
"eval_runtime": 6.1779,
"eval_samples_per_second": 46.132,
"eval_steps_per_second": 5.827,
"step": 300
},
{
"epoch": 5.559440559440559,
"grad_norm": 2.1614158153533936,
"learning_rate": 0.00028735714285714286,
"loss": 3.7693,
"step": 400
},
{
"epoch": 5.559440559440559,
"eval_cer": 0.9903044405662207,
"eval_loss": 3.668625831604004,
"eval_runtime": 6.1599,
"eval_samples_per_second": 46.267,
"eval_steps_per_second": 5.844,
"step": 400
},
{
"epoch": 6.951048951048951,
"grad_norm": 1.3560220003128052,
"learning_rate": 0.00028307142857142854,
"loss": 3.671,
"step": 500
},
{
"epoch": 6.951048951048951,
"eval_cer": 0.9893348846228427,
"eval_loss": 3.590017080307007,
"eval_runtime": 6.1396,
"eval_samples_per_second": 46.42,
"eval_steps_per_second": 5.864,
"step": 500
},
{
"epoch": 8.335664335664335,
"grad_norm": 1.1032301187515259,
"learning_rate": 0.00027878571428571427,
"loss": 3.5618,
"step": 600
},
{
"epoch": 8.335664335664335,
"eval_cer": 0.9713011440760132,
"eval_loss": 3.5168509483337402,
"eval_runtime": 6.166,
"eval_samples_per_second": 46.221,
"eval_steps_per_second": 5.838,
"step": 600
},
{
"epoch": 9.727272727272727,
"grad_norm": 1.4139310121536255,
"learning_rate": 0.0002745,
"loss": 3.4994,
"step": 700
},
{
"epoch": 9.727272727272727,
"eval_cer": 0.9699437657552841,
"eval_loss": 3.3551578521728516,
"eval_runtime": 6.1552,
"eval_samples_per_second": 46.302,
"eval_steps_per_second": 5.849,
"step": 700
},
{
"epoch": 11.111888111888112,
"grad_norm": 0.8385369777679443,
"learning_rate": 0.0002702142857142857,
"loss": 3.3323,
"step": 800
},
{
"epoch": 11.111888111888112,
"eval_cer": 0.954043048283886,
"eval_loss": 3.138484001159668,
"eval_runtime": 6.0631,
"eval_samples_per_second": 47.005,
"eval_steps_per_second": 5.938,
"step": 800
},
{
"epoch": 12.503496503496503,
"grad_norm": 3.5932364463806152,
"learning_rate": 0.0002659285714285714,
"loss": 3.163,
"step": 900
},
{
"epoch": 12.503496503496503,
"eval_cer": 0.9185573007562536,
"eval_loss": 2.9224491119384766,
"eval_runtime": 6.0879,
"eval_samples_per_second": 46.814,
"eval_steps_per_second": 5.913,
"step": 900
},
{
"epoch": 13.895104895104895,
"grad_norm": 1.8399698734283447,
"learning_rate": 0.0002616428571428571,
"loss": 2.7901,
"step": 1000
},
{
"epoch": 13.895104895104895,
"eval_cer": 0.7828194686833431,
"eval_loss": 2.180238723754883,
"eval_runtime": 6.0259,
"eval_samples_per_second": 47.296,
"eval_steps_per_second": 5.974,
"step": 1000
},
{
"epoch": 15.27972027972028,
"grad_norm": 1.625369668006897,
"learning_rate": 0.00025735714285714283,
"loss": 2.3425,
"step": 1100
},
{
"epoch": 15.27972027972028,
"eval_cer": 0.6528989722707,
"eval_loss": 1.8405641317367554,
"eval_runtime": 6.0991,
"eval_samples_per_second": 46.728,
"eval_steps_per_second": 5.903,
"step": 1100
},
{
"epoch": 16.67132867132867,
"grad_norm": 2.625293016433716,
"learning_rate": 0.0002530714285714285,
"loss": 2.0608,
"step": 1200
},
{
"epoch": 16.67132867132867,
"eval_cer": 0.6329261198371146,
"eval_loss": 1.6505399942398071,
"eval_runtime": 6.1263,
"eval_samples_per_second": 46.52,
"eval_steps_per_second": 5.876,
"step": 1200
},
{
"epoch": 18.055944055944057,
"grad_norm": 1.92220139503479,
"learning_rate": 0.00024878571428571425,
"loss": 1.8813,
"step": 1300
},
{
"epoch": 18.055944055944057,
"eval_cer": 0.5714562730269537,
"eval_loss": 1.4768792390823364,
"eval_runtime": 6.1089,
"eval_samples_per_second": 46.653,
"eval_steps_per_second": 5.893,
"step": 1300
},
{
"epoch": 19.447552447552447,
"grad_norm": 3.1366982460021973,
"learning_rate": 0.0002445,
"loss": 1.6705,
"step": 1400
},
{
"epoch": 19.447552447552447,
"eval_cer": 0.5580764010083382,
"eval_loss": 1.479285478591919,
"eval_runtime": 6.1149,
"eval_samples_per_second": 46.608,
"eval_steps_per_second": 5.887,
"step": 1400
},
{
"epoch": 20.83916083916084,
"grad_norm": 2.116931200027466,
"learning_rate": 0.0002402142857142857,
"loss": 1.558,
"step": 1500
},
{
"epoch": 20.83916083916084,
"eval_cer": 0.4969943765755284,
"eval_loss": 1.3079291582107544,
"eval_runtime": 6.0774,
"eval_samples_per_second": 46.895,
"eval_steps_per_second": 5.924,
"step": 1500
},
{
"epoch": 22.223776223776223,
"grad_norm": 4.369190692901611,
"learning_rate": 0.00023592857142857142,
"loss": 1.4213,
"step": 1600
},
{
"epoch": 22.223776223776223,
"eval_cer": 0.49466744231142135,
"eval_loss": 1.3551599979400635,
"eval_runtime": 6.1085,
"eval_samples_per_second": 46.657,
"eval_steps_per_second": 5.893,
"step": 1600
},
{
"epoch": 23.615384615384617,
"grad_norm": 2.938127279281616,
"learning_rate": 0.00023164285714285713,
"loss": 1.3122,
"step": 1700
},
{
"epoch": 23.615384615384617,
"eval_cer": 0.4355245297653675,
"eval_loss": 1.236782431602478,
"eval_runtime": 6.1138,
"eval_samples_per_second": 46.616,
"eval_steps_per_second": 5.888,
"step": 1700
},
{
"epoch": 25.0,
"grad_norm": 4.156201362609863,
"learning_rate": 0.00022735714285714286,
"loss": 1.2303,
"step": 1800
},
{
"epoch": 25.0,
"eval_cer": 0.4347488850106651,
"eval_loss": 1.210758924484253,
"eval_runtime": 6.0871,
"eval_samples_per_second": 46.82,
"eval_steps_per_second": 5.914,
"step": 1800
},
{
"epoch": 26.39160839160839,
"grad_norm": 3.738050937652588,
"learning_rate": 0.00022307142857142854,
"loss": 1.1152,
"step": 1900
},
{
"epoch": 26.39160839160839,
"eval_cer": 0.4306767500484778,
"eval_loss": 1.2177391052246094,
"eval_runtime": 6.1061,
"eval_samples_per_second": 46.674,
"eval_steps_per_second": 5.896,
"step": 1900
},
{
"epoch": 27.783216783216783,
"grad_norm": 3.721745014190674,
"learning_rate": 0.00021878571428571428,
"loss": 1.0441,
"step": 2000
},
{
"epoch": 27.783216783216783,
"eval_cer": 0.4291254605390731,
"eval_loss": 1.3235960006713867,
"eval_runtime": 6.0708,
"eval_samples_per_second": 46.946,
"eval_steps_per_second": 5.93,
"step": 2000
},
{
"epoch": 29.167832167832167,
"grad_norm": 2.746555805206299,
"learning_rate": 0.00021449999999999998,
"loss": 0.9626,
"step": 2100
},
{
"epoch": 29.167832167832167,
"eval_cer": 0.4157455885204576,
"eval_loss": 1.2737609148025513,
"eval_runtime": 6.1093,
"eval_samples_per_second": 46.65,
"eval_steps_per_second": 5.893,
"step": 2100
},
{
"epoch": 30.55944055944056,
"grad_norm": 2.8237345218658447,
"learning_rate": 0.0002102142857142857,
"loss": 0.8987,
"step": 2200
},
{
"epoch": 30.55944055944056,
"eval_cer": 0.4190420787279426,
"eval_loss": 1.2683167457580566,
"eval_runtime": 6.1368,
"eval_samples_per_second": 46.441,
"eval_steps_per_second": 5.866,
"step": 2200
},
{
"epoch": 31.95104895104895,
"grad_norm": 5.561631679534912,
"learning_rate": 0.0002059285714285714,
"loss": 0.8367,
"step": 2300
},
{
"epoch": 31.95104895104895,
"eval_cer": 0.41438821019972855,
"eval_loss": 1.2570189237594604,
"eval_runtime": 6.095,
"eval_samples_per_second": 46.76,
"eval_steps_per_second": 5.906,
"step": 2300
},
{
"epoch": 33.33566433566433,
"grad_norm": 1.7682024240493774,
"learning_rate": 0.00020164285714285713,
"loss": 0.7617,
"step": 2400
},
{
"epoch": 33.33566433566433,
"eval_cer": 0.3876284661624976,
"eval_loss": 1.233074426651001,
"eval_runtime": 6.0871,
"eval_samples_per_second": 46.821,
"eval_steps_per_second": 5.914,
"step": 2400
},
{
"epoch": 34.72727272727273,
"grad_norm": 3.293351888656616,
"learning_rate": 0.00019735714285714284,
"loss": 0.7069,
"step": 2500
},
{
"epoch": 34.72727272727273,
"eval_cer": 0.40372309482257124,
"eval_loss": 1.328414797782898,
"eval_runtime": 6.0987,
"eval_samples_per_second": 46.731,
"eval_steps_per_second": 5.903,
"step": 2500
},
{
"epoch": 36.11188811188811,
"grad_norm": 1.8948358297348022,
"learning_rate": 0.00019307142857142854,
"loss": 0.6874,
"step": 2600
},
{
"epoch": 36.11188811188811,
"eval_cer": 0.38181113050223,
"eval_loss": 1.2947708368301392,
"eval_runtime": 6.0589,
"eval_samples_per_second": 47.038,
"eval_steps_per_second": 5.942,
"step": 2600
},
{
"epoch": 37.50349650349651,
"grad_norm": 2.3135173320770264,
"learning_rate": 0.00018878571428571428,
"loss": 0.6615,
"step": 2700
},
{
"epoch": 37.50349650349651,
"eval_cer": 0.3977118479736281,
"eval_loss": 1.299822211265564,
"eval_runtime": 6.0553,
"eval_samples_per_second": 47.066,
"eval_steps_per_second": 5.945,
"step": 2700
},
{
"epoch": 38.89510489510489,
"grad_norm": 8.07669448852539,
"learning_rate": 0.00018449999999999999,
"loss": 0.6086,
"step": 2800
},
{
"epoch": 38.89510489510489,
"eval_cer": 0.3757998836532868,
"eval_loss": 1.3369208574295044,
"eval_runtime": 6.0593,
"eval_samples_per_second": 47.035,
"eval_steps_per_second": 5.941,
"step": 2800
},
{
"epoch": 40.27972027972028,
"grad_norm": 2.3282470703125,
"learning_rate": 0.00018021428571428572,
"loss": 0.5804,
"step": 2900
},
{
"epoch": 40.27972027972028,
"eval_cer": 0.38375024238898586,
"eval_loss": 1.2814927101135254,
"eval_runtime": 6.0834,
"eval_samples_per_second": 46.849,
"eval_steps_per_second": 5.918,
"step": 2900
},
{
"epoch": 41.67132867132867,
"grad_norm": 5.158154010772705,
"learning_rate": 0.0001759285714285714,
"loss": 0.548,
"step": 3000
},
{
"epoch": 41.67132867132867,
"eval_cer": 0.37657552840798914,
"eval_loss": 1.3390411138534546,
"eval_runtime": 6.0871,
"eval_samples_per_second": 46.82,
"eval_steps_per_second": 5.914,
"step": 3000
},
{
"epoch": 43.05594405594405,
"grad_norm": 1.2800214290618896,
"learning_rate": 0.00017164285714285713,
"loss": 0.5239,
"step": 3100
},
{
"epoch": 43.05594405594405,
"eval_cer": 0.367267791351561,
"eval_loss": 1.257192850112915,
"eval_runtime": 6.0964,
"eval_samples_per_second": 46.749,
"eval_steps_per_second": 5.905,
"step": 3100
},
{
"epoch": 44.44755244755245,
"grad_norm": 4.9716010093688965,
"learning_rate": 0.00016735714285714284,
"loss": 0.4983,
"step": 3200
},
{
"epoch": 44.44755244755245,
"eval_cer": 0.3670738801628854,
"eval_loss": 1.295488715171814,
"eval_runtime": 6.034,
"eval_samples_per_second": 47.232,
"eval_steps_per_second": 5.966,
"step": 3200
},
{
"epoch": 45.83916083916084,
"grad_norm": 2.536774158477783,
"learning_rate": 0.0001631142857142857,
"loss": 0.4793,
"step": 3300
},
{
"epoch": 45.83916083916084,
"eval_cer": 0.372891215823153,
"eval_loss": 1.3562514781951904,
"eval_runtime": 6.0912,
"eval_samples_per_second": 46.789,
"eval_steps_per_second": 5.91,
"step": 3300
},
{
"epoch": 47.22377622377623,
"grad_norm": 3.6583845615386963,
"learning_rate": 0.00015882857142857142,
"loss": 0.438,
"step": 3400
},
{
"epoch": 47.22377622377623,
"eval_cer": 0.3915066899360093,
"eval_loss": 1.415280818939209,
"eval_runtime": 6.0618,
"eval_samples_per_second": 47.016,
"eval_steps_per_second": 5.939,
"step": 3400
},
{
"epoch": 48.61538461538461,
"grad_norm": 2.472052574157715,
"learning_rate": 0.00015454285714285712,
"loss": 0.4274,
"step": 3500
},
{
"epoch": 48.61538461538461,
"eval_cer": 0.36629823540818307,
"eval_loss": 1.319765567779541,
"eval_runtime": 6.1106,
"eval_samples_per_second": 46.64,
"eval_steps_per_second": 5.891,
"step": 3500
},
{
"epoch": 50.0,
"grad_norm": 7.336581707000732,
"learning_rate": 0.00015025714285714286,
"loss": 0.4064,
"step": 3600
},
{
"epoch": 50.0,
"eval_cer": 0.3814233081248788,
"eval_loss": 1.4350632429122925,
"eval_runtime": 6.098,
"eval_samples_per_second": 46.736,
"eval_steps_per_second": 5.904,
"step": 3600
},
{
"epoch": 51.39160839160839,
"grad_norm": 2.7026124000549316,
"learning_rate": 0.00014597142857142856,
"loss": 0.3812,
"step": 3700
},
{
"epoch": 51.39160839160839,
"eval_cer": 0.36203218925732017,
"eval_loss": 1.351439356803894,
"eval_runtime": 6.0572,
"eval_samples_per_second": 47.051,
"eval_steps_per_second": 5.943,
"step": 3700
},
{
"epoch": 52.78321678321678,
"grad_norm": 1.425048589706421,
"learning_rate": 0.00014168571428571427,
"loss": 0.3753,
"step": 3800
},
{
"epoch": 52.78321678321678,
"eval_cer": 0.3492340508047314,
"eval_loss": 1.3715204000473022,
"eval_runtime": 6.0504,
"eval_samples_per_second": 47.104,
"eval_steps_per_second": 5.95,
"step": 3800
},
{
"epoch": 54.16783216783217,
"grad_norm": 2.945066452026367,
"learning_rate": 0.0001374,
"loss": 0.3549,
"step": 3900
},
{
"epoch": 54.16783216783217,
"eval_cer": 0.36494085708745394,
"eval_loss": 1.4132966995239258,
"eval_runtime": 6.0971,
"eval_samples_per_second": 46.743,
"eval_steps_per_second": 5.904,
"step": 3900
},
{
"epoch": 55.55944055944056,
"grad_norm": 1.2087554931640625,
"learning_rate": 0.0001331142857142857,
"loss": 0.3262,
"step": 4000
},
{
"epoch": 55.55944055944056,
"eval_cer": 0.3573783207291061,
"eval_loss": 1.4259963035583496,
"eval_runtime": 6.0615,
"eval_samples_per_second": 47.018,
"eval_steps_per_second": 5.939,
"step": 4000
},
{
"epoch": 56.95104895104895,
"grad_norm": 6.813267230987549,
"learning_rate": 0.00012882857142857142,
"loss": 0.3296,
"step": 4100
},
{
"epoch": 56.95104895104895,
"eval_cer": 0.35524529765367463,
"eval_loss": 1.5134129524230957,
"eval_runtime": 6.0512,
"eval_samples_per_second": 47.098,
"eval_steps_per_second": 5.949,
"step": 4100
},
{
"epoch": 58.33566433566433,
"grad_norm": 1.582381248474121,
"learning_rate": 0.00012454285714285713,
"loss": 0.3136,
"step": 4200
},
{
"epoch": 58.33566433566433,
"eval_cer": 0.35873569904983515,
"eval_loss": 1.4695625305175781,
"eval_runtime": 6.0851,
"eval_samples_per_second": 46.836,
"eval_steps_per_second": 5.916,
"step": 4200
},
{
"epoch": 59.72727272727273,
"grad_norm": 0.9694690108299255,
"learning_rate": 0.00012025714285714285,
"loss": 0.3009,
"step": 4300
},
{
"epoch": 59.72727272727273,
"eval_cer": 0.3554392088423502,
"eval_loss": 1.432596206665039,
"eval_runtime": 6.0669,
"eval_samples_per_second": 46.976,
"eval_steps_per_second": 5.934,
"step": 4300
},
{
"epoch": 61.11188811188811,
"grad_norm": 1.6826478242874146,
"learning_rate": 0.00011597142857142855,
"loss": 0.2764,
"step": 4400
},
{
"epoch": 61.11188811188811,
"eval_cer": 0.35718440954043046,
"eval_loss": 1.4485613107681274,
"eval_runtime": 6.0638,
"eval_samples_per_second": 47.0,
"eval_steps_per_second": 5.937,
"step": 4400
},
{
"epoch": 62.50349650349651,
"grad_norm": 1.2600972652435303,
"learning_rate": 0.00011168571428571427,
"loss": 0.2738,
"step": 4500
},
{
"epoch": 62.50349650349651,
"eval_cer": 0.3593174326158619,
"eval_loss": 1.446284294128418,
"eval_runtime": 6.0798,
"eval_samples_per_second": 46.876,
"eval_steps_per_second": 5.921,
"step": 4500
},
{
"epoch": 63.89510489510489,
"grad_norm": 2.8400301933288574,
"learning_rate": 0.00010739999999999998,
"loss": 0.2574,
"step": 4600
},
{
"epoch": 63.89510489510489,
"eval_cer": 0.358347876672484,
"eval_loss": 1.4303468465805054,
"eval_runtime": 6.1117,
"eval_samples_per_second": 46.632,
"eval_steps_per_second": 5.89,
"step": 4600
},
{
"epoch": 65.27972027972028,
"grad_norm": 1.22808039188385,
"learning_rate": 0.0001031142857142857,
"loss": 0.2397,
"step": 4700
},
{
"epoch": 65.27972027972028,
"eval_cer": 0.3445801822765174,
"eval_loss": 1.4538311958312988,
"eval_runtime": 6.0831,
"eval_samples_per_second": 46.851,
"eval_steps_per_second": 5.918,
"step": 4700
},
{
"epoch": 66.67132867132867,
"grad_norm": 1.9458190202713013,
"learning_rate": 9.882857142857141e-05,
"loss": 0.2474,
"step": 4800
},
{
"epoch": 66.67132867132867,
"eval_cer": 0.3496218731820826,
"eval_loss": 1.4416499137878418,
"eval_runtime": 6.1066,
"eval_samples_per_second": 46.671,
"eval_steps_per_second": 5.895,
"step": 4800
},
{
"epoch": 68.05594405594405,
"grad_norm": 1.5397316217422485,
"learning_rate": 9.454285714285714e-05,
"loss": 0.2212,
"step": 4900
},
{
"epoch": 68.05594405594405,
"eval_cer": 0.34477409346519294,
"eval_loss": 1.476562738418579,
"eval_runtime": 6.125,
"eval_samples_per_second": 46.53,
"eval_steps_per_second": 5.878,
"step": 4900
},
{
"epoch": 69.44755244755245,
"grad_norm": 4.152817249298096,
"learning_rate": 9.025714285714285e-05,
"loss": 0.2173,
"step": 5000
},
{
"epoch": 69.44755244755245,
"eval_cer": 0.3496218731820826,
"eval_loss": 1.4784653186798096,
"eval_runtime": 6.0989,
"eval_samples_per_second": 46.73,
"eval_steps_per_second": 5.903,
"step": 5000
},
{
"epoch": 70.83916083916084,
"grad_norm": 1.8647962808609009,
"learning_rate": 8.597142857142857e-05,
"loss": 0.2138,
"step": 5100
},
{
"epoch": 70.83916083916084,
"eval_cer": 0.3581539654838084,
"eval_loss": 1.4859139919281006,
"eval_runtime": 6.0911,
"eval_samples_per_second": 46.79,
"eval_steps_per_second": 5.91,
"step": 5100
},
{
"epoch": 72.22377622377623,
"grad_norm": 6.688198089599609,
"learning_rate": 8.168571428571428e-05,
"loss": 0.2037,
"step": 5200
},
{
"epoch": 72.22377622377623,
"eval_cer": 0.3500096955594338,
"eval_loss": 1.5022231340408325,
"eval_runtime": 6.1063,
"eval_samples_per_second": 46.673,
"eval_steps_per_second": 5.896,
"step": 5200
},
{
"epoch": 73.61538461538461,
"grad_norm": 2.6784660816192627,
"learning_rate": 7.74e-05,
"loss": 0.194,
"step": 5300
},
{
"epoch": 73.61538461538461,
"eval_cer": 0.34904013961605584,
"eval_loss": 1.4964337348937988,
"eval_runtime": 6.0868,
"eval_samples_per_second": 46.823,
"eval_steps_per_second": 5.914,
"step": 5300
},
{
"epoch": 75.0,
"grad_norm": 2.1717565059661865,
"learning_rate": 7.31142857142857e-05,
"loss": 0.1758,
"step": 5400
},
{
"epoch": 75.0,
"eval_cer": 0.35524529765367463,
"eval_loss": 1.5645275115966797,
"eval_runtime": 6.0591,
"eval_samples_per_second": 47.037,
"eval_steps_per_second": 5.941,
"step": 5400
},
{
"epoch": 76.39160839160839,
"grad_norm": 3.3418147563934326,
"learning_rate": 6.882857142857142e-05,
"loss": 0.1693,
"step": 5500
},
{
"epoch": 76.39160839160839,
"eval_cer": 0.3492340508047314,
"eval_loss": 1.5214943885803223,
"eval_runtime": 6.0965,
"eval_samples_per_second": 46.748,
"eval_steps_per_second": 5.905,
"step": 5500
},
{
"epoch": 77.78321678321679,
"grad_norm": 1.1721317768096924,
"learning_rate": 6.454285714285713e-05,
"loss": 0.1682,
"step": 5600
},
{
"epoch": 77.78321678321679,
"eval_cer": 0.34361062633313944,
"eval_loss": 1.557164192199707,
"eval_runtime": 6.0786,
"eval_samples_per_second": 46.886,
"eval_steps_per_second": 5.922,
"step": 5600
},
{
"epoch": 79.16783216783217,
"grad_norm": 1.2348577976226807,
"learning_rate": 6.0257142857142846e-05,
"loss": 0.1616,
"step": 5700
},
{
"epoch": 79.16783216783217,
"eval_cer": 0.3461314717859221,
"eval_loss": 1.497072696685791,
"eval_runtime": 6.0713,
"eval_samples_per_second": 46.942,
"eval_steps_per_second": 5.93,
"step": 5700
},
{
"epoch": 80.55944055944056,
"grad_norm": 1.101247787475586,
"learning_rate": 5.597142857142857e-05,
"loss": 0.1625,
"step": 5800
},
{
"epoch": 80.55944055944056,
"eval_cer": 0.3515609850688385,
"eval_loss": 1.5326788425445557,
"eval_runtime": 6.0886,
"eval_samples_per_second": 46.809,
"eval_steps_per_second": 5.913,
"step": 5800
},
{
"epoch": 81.95104895104895,
"grad_norm": 4.953105449676514,
"learning_rate": 5.168571428571428e-05,
"loss": 0.1432,
"step": 5900
},
{
"epoch": 81.95104895104895,
"eval_cer": 0.35059142912546054,
"eval_loss": 1.5595433712005615,
"eval_runtime": 6.078,
"eval_samples_per_second": 46.89,
"eval_steps_per_second": 5.923,
"step": 5900
},
{
"epoch": 83.33566433566433,
"grad_norm": 1.2514948844909668,
"learning_rate": 4.7399999999999993e-05,
"loss": 0.1348,
"step": 6000
},
{
"epoch": 83.33566433566433,
"eval_cer": 0.3482644948613535,
"eval_loss": 1.556195616722107,
"eval_runtime": 6.0756,
"eval_samples_per_second": 46.909,
"eval_steps_per_second": 5.925,
"step": 6000
},
{
"epoch": 84.72727272727273,
"grad_norm": 0.5957698225975037,
"learning_rate": 4.311428571428571e-05,
"loss": 0.137,
"step": 6100
},
{
"epoch": 84.72727272727273,
"eval_cer": 0.3484584060500291,
"eval_loss": 1.590181827545166,
"eval_runtime": 6.0761,
"eval_samples_per_second": 46.905,
"eval_steps_per_second": 5.925,
"step": 6100
},
{
"epoch": 86.1118881118881,
"grad_norm": 2.8553950786590576,
"learning_rate": 3.882857142857142e-05,
"loss": 0.1263,
"step": 6200
},
{
"epoch": 86.1118881118881,
"eval_cer": 0.35214271863486524,
"eval_loss": 1.5852645635604858,
"eval_runtime": 6.1244,
"eval_samples_per_second": 46.535,
"eval_steps_per_second": 5.878,
"step": 6200
},
{
"epoch": 87.5034965034965,
"grad_norm": 0.7932090759277344,
"learning_rate": 3.454285714285714e-05,
"loss": 0.1271,
"step": 6300
},
{
"epoch": 87.5034965034965,
"eval_cer": 0.3488462284273803,
"eval_loss": 1.597654938697815,
"eval_runtime": 6.1436,
"eval_samples_per_second": 46.389,
"eval_steps_per_second": 5.86,
"step": 6300
},
{
"epoch": 88.8951048951049,
"grad_norm": 2.1244022846221924,
"learning_rate": 3.0257142857142855e-05,
"loss": 0.123,
"step": 6400
},
{
"epoch": 88.8951048951049,
"eval_cer": 0.3498157843707582,
"eval_loss": 1.6023805141448975,
"eval_runtime": 6.0927,
"eval_samples_per_second": 46.777,
"eval_steps_per_second": 5.909,
"step": 6400
},
{
"epoch": 90.27972027972028,
"grad_norm": 1.3532764911651611,
"learning_rate": 2.5971428571428572e-05,
"loss": 0.117,
"step": 6500
},
{
"epoch": 90.27972027972028,
"eval_cer": 0.3535000969555943,
"eval_loss": 1.6092747449874878,
"eval_runtime": 6.1301,
"eval_samples_per_second": 46.492,
"eval_steps_per_second": 5.873,
"step": 6500
},
{
"epoch": 91.67132867132867,
"grad_norm": 3.1814770698547363,
"learning_rate": 2.1685714285714286e-05,
"loss": 0.1077,
"step": 6600
},
{
"epoch": 91.67132867132867,
"eval_cer": 0.35194880744618967,
"eval_loss": 1.5807358026504517,
"eval_runtime": 6.0891,
"eval_samples_per_second": 46.805,
"eval_steps_per_second": 5.912,
"step": 6600
},
{
"epoch": 93.05594405594405,
"grad_norm": 1.0375070571899414,
"learning_rate": 1.74e-05,
"loss": 0.1072,
"step": 6700
},
{
"epoch": 93.05594405594405,
"eval_cer": 0.3476827612953267,
"eval_loss": 1.580068588256836,
"eval_runtime": 6.1035,
"eval_samples_per_second": 46.695,
"eval_steps_per_second": 5.898,
"step": 6700
},
{
"epoch": 94.44755244755245,
"grad_norm": 1.3363580703735352,
"learning_rate": 1.3114285714285713e-05,
"loss": 0.1063,
"step": 6800
},
{
"epoch": 94.44755244755245,
"eval_cer": 0.35020360674810935,
"eval_loss": 1.5893759727478027,
"eval_runtime": 6.0883,
"eval_samples_per_second": 46.811,
"eval_steps_per_second": 5.913,
"step": 6800
},
{
"epoch": 95.83916083916084,
"grad_norm": 2.259174108505249,
"learning_rate": 8.828571428571429e-06,
"loss": 0.103,
"step": 6900
},
{
"epoch": 95.83916083916084,
"eval_cer": 0.3498157843707582,
"eval_loss": 1.602723240852356,
"eval_runtime": 6.0922,
"eval_samples_per_second": 46.781,
"eval_steps_per_second": 5.909,
"step": 6900
},
{
"epoch": 97.22377622377623,
"grad_norm": 1.8573235273361206,
"learning_rate": 4.585714285714285e-06,
"loss": 0.1032,
"step": 7000
},
{
"epoch": 97.22377622377623,
"eval_cer": 0.3484584060500291,
"eval_loss": 1.6034408807754517,
"eval_runtime": 6.1003,
"eval_samples_per_second": 46.719,
"eval_steps_per_second": 5.901,
"step": 7000
},
{
"epoch": 98.61538461538461,
"grad_norm": 1.2302757501602173,
"learning_rate": 3e-07,
"loss": 0.0971,
"step": 7100
},
{
"epoch": 98.61538461538461,
"eval_cer": 0.3480705836726779,
"eval_loss": 1.6103968620300293,
"eval_runtime": 6.1158,
"eval_samples_per_second": 46.6,
"eval_steps_per_second": 5.886,
"step": 7100
},
{
"epoch": 98.61538461538461,
"step": 7100,
"total_flos": 1.5637471182402683e+19,
"train_loss": 1.063481583259475,
"train_runtime": 9766.9628,
"train_samples_per_second": 23.354,
"train_steps_per_second": 0.727
},
{
"epoch": 98.61538461538461,
"eval_cer": 0.35214271863486524,
"eval_loss": 1.6103789806365967,
"eval_runtime": 6.0538,
"eval_samples_per_second": 47.078,
"eval_steps_per_second": 5.947,
"step": 7100
}
],
"logging_steps": 100,
"max_steps": 7100,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.5637471182402683e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}