Whisper-small-Taiwanese / metrics.json
Location0717's picture
add init model
6be984a
[
{
"loss": 7.8454,
"grad_norm": 37.07211685180664,
"learning_rate": 9.800000000000001e-07,
"epoch": 0.061092019854906456,
"step": 50
},
{
"loss": 3.9827,
"grad_norm": 20.092954635620117,
"learning_rate": 1.98e-06,
"epoch": 0.12218403970981291,
"step": 100
},
{
"loss": 2.3931,
"grad_norm": 17.73457908630371,
"learning_rate": 2.9800000000000003e-06,
"epoch": 0.18327605956471935,
"step": 150
},
{
"loss": 1.8963,
"grad_norm": 20.85625457763672,
"learning_rate": 3.980000000000001e-06,
"epoch": 0.24436807941962582,
"step": 200
},
{
"loss": 1.4987,
"grad_norm": 16.645475387573242,
"learning_rate": 4.980000000000001e-06,
"epoch": 0.30546009927453227,
"step": 250
},
{
"loss": 0.6793,
"grad_norm": 5.7492899894714355,
"learning_rate": 5.98e-06,
"epoch": 0.3665521191294387,
"step": 300
},
{
"loss": 0.4645,
"grad_norm": 6.880336284637451,
"learning_rate": 6.98e-06,
"epoch": 0.42764413898434517,
"step": 350
},
{
"loss": 0.4268,
"grad_norm": 5.579587936401367,
"learning_rate": 7.980000000000002e-06,
"epoch": 0.48873615883925164,
"step": 400
},
{
"loss": 0.3482,
"grad_norm": 4.888024806976318,
"learning_rate": 8.98e-06,
"epoch": 0.5498281786941581,
"step": 450
},
{
"loss": 0.3344,
"grad_norm": 5.0392961502075195,
"learning_rate": 9.980000000000001e-06,
"epoch": 0.6109201985490645,
"step": 500
},
{
"eval_loss": 0.34320297837257385,
"eval_lev": 0.11722440908020909,
"eval_runtime": 762.646,
"eval_samples_per_second": 7.631,
"eval_steps_per_second": 1.908,
"epoch": 0.6109201985490645,
"step": 500
},
{
"loss": 0.3161,
"grad_norm": 4.948662281036377,
"learning_rate": 9.863699582753825e-06,
"epoch": 0.672012218403971,
"step": 550
},
{
"loss": 0.3174,
"grad_norm": 6.04939079284668,
"learning_rate": 9.724617524339361e-06,
"epoch": 0.7331042382588774,
"step": 600
},
{
"loss": 0.3027,
"grad_norm": 4.422155380249023,
"learning_rate": 9.585535465924897e-06,
"epoch": 0.7941962581137839,
"step": 650
},
{
"loss": 0.2771,
"grad_norm": 3.8613669872283936,
"learning_rate": 9.446453407510432e-06,
"epoch": 0.8552882779686903,
"step": 700
},
{
"loss": 0.2778,
"grad_norm": 3.7996127605438232,
"learning_rate": 9.307371349095966e-06,
"epoch": 0.9163802978235968,
"step": 750
},
{
"loss": 0.2656,
"grad_norm": 4.138099670410156,
"learning_rate": 9.168289290681502e-06,
"epoch": 0.9774723176785033,
"step": 800
},
{
"loss": 0.2132,
"grad_norm": 3.6106605529785156,
"learning_rate": 9.029207232267038e-06,
"epoch": 1.037877052310042,
"step": 850
},
{
"loss": 0.2045,
"grad_norm": 4.340276718139648,
"learning_rate": 8.890125173852574e-06,
"epoch": 1.0989690721649485,
"step": 900
},
{
"loss": 0.1889,
"grad_norm": 3.8788809776306152,
"learning_rate": 8.75104311543811e-06,
"epoch": 1.1600610920198549,
"step": 950
},
{
"loss": 0.1977,
"grad_norm": 3.841637372970581,
"learning_rate": 8.611961057023645e-06,
"epoch": 1.2211531118747614,
"step": 1000
},
{
"eval_loss": 0.231288880109787,
"eval_lev": 0.1185600415981164,
"eval_runtime": 746.701,
"eval_samples_per_second": 7.794,
"eval_steps_per_second": 1.949,
"epoch": 1.2211531118747614,
"step": 1000
},
{
"loss": 0.1913,
"grad_norm": 3.2860212326049805,
"learning_rate": 8.47287899860918e-06,
"epoch": 1.2822451317296677,
"step": 1050
},
{
"loss": 0.1623,
"grad_norm": 3.720313549041748,
"learning_rate": 8.333796940194717e-06,
"epoch": 1.3433371515845742,
"step": 1100
},
{
"loss": 0.1703,
"grad_norm": 2.716610908508301,
"learning_rate": 8.194714881780251e-06,
"epoch": 1.4044291714394808,
"step": 1150
},
{
"loss": 0.1848,
"grad_norm": 3.862579822540283,
"learning_rate": 8.055632823365787e-06,
"epoch": 1.4655211912943873,
"step": 1200
},
{
"loss": 0.1774,
"grad_norm": 4.334630012512207,
"learning_rate": 7.916550764951322e-06,
"epoch": 1.5266132111492936,
"step": 1250
},
{
"loss": 0.1658,
"grad_norm": 3.6040408611297607,
"learning_rate": 7.777468706536857e-06,
"epoch": 1.5877052310042,
"step": 1300
},
{
"loss": 0.1634,
"grad_norm": 3.428239345550537,
"learning_rate": 7.638386648122394e-06,
"epoch": 1.6487972508591064,
"step": 1350
},
{
"loss": 0.1547,
"grad_norm": 4.171792030334473,
"learning_rate": 7.499304589707929e-06,
"epoch": 1.709889270714013,
"step": 1400
},
{
"loss": 0.1807,
"grad_norm": 3.5489614009857178,
"learning_rate": 7.360222531293464e-06,
"epoch": 1.7709812905689195,
"step": 1450
},
{
"loss": 0.1684,
"grad_norm": 3.7410941123962402,
"learning_rate": 7.221140472878999e-06,
"epoch": 1.832073310423826,
"step": 1500
},
{
"eval_loss": 0.19100622832775116,
"eval_lev": 0.11059288646312225,
"eval_runtime": 743.8165,
"eval_samples_per_second": 7.825,
"eval_steps_per_second": 1.956,
"epoch": 1.832073310423826,
"step": 1500
},
{
"loss": 0.1629,
"grad_norm": 4.399950981140137,
"learning_rate": 7.0820584144645345e-06,
"epoch": 1.8931653302787324,
"step": 1550
},
{
"loss": 0.163,
"grad_norm": 4.052116870880127,
"learning_rate": 6.9429763560500694e-06,
"epoch": 1.9542573501336387,
"step": 1600
},
{
"loss": 0.1384,
"grad_norm": 2.347085952758789,
"learning_rate": 6.803894297635606e-06,
"epoch": 2.0146620847651775,
"step": 1650
},
{
"loss": 0.1002,
"grad_norm": 3.4055838584899902,
"learning_rate": 6.664812239221141e-06,
"epoch": 2.075754104620084,
"step": 1700
},
{
"loss": 0.098,
"grad_norm": 3.1648952960968018,
"learning_rate": 6.525730180806677e-06,
"epoch": 2.1368461244749906,
"step": 1750
},
{
"loss": 0.0914,
"grad_norm": 2.8259644508361816,
"learning_rate": 6.386648122392212e-06,
"epoch": 2.197938144329897,
"step": 1800
},
{
"loss": 0.0978,
"grad_norm": 2.8769214153289795,
"learning_rate": 6.247566063977747e-06,
"epoch": 2.259030164184803,
"step": 1850
},
{
"loss": 0.1013,
"grad_norm": 3.907423257827759,
"learning_rate": 6.108484005563283e-06,
"epoch": 2.3201221840397097,
"step": 1900
},
{
"loss": 0.0975,
"grad_norm": 3.2260284423828125,
"learning_rate": 5.969401947148818e-06,
"epoch": 2.3812142038946162,
"step": 1950
},
{
"loss": 0.0857,
"grad_norm": 3.05881929397583,
"learning_rate": 5.830319888734354e-06,
"epoch": 2.4423062237495228,
"step": 2000
},
{
"eval_loss": 0.1767120063304901,
"eval_lev": 0.10026850043947817,
"eval_runtime": 736.5694,
"eval_samples_per_second": 7.901,
"eval_steps_per_second": 1.975,
"epoch": 2.4423062237495228,
"step": 2000
},
{
"loss": 0.0925,
"grad_norm": 3.2194902896881104,
"learning_rate": 5.691237830319889e-06,
"epoch": 2.5033982436044293,
"step": 2050
},
{
"loss": 0.0982,
"grad_norm": 3.6402034759521484,
"learning_rate": 5.552155771905425e-06,
"epoch": 2.5644902634593354,
"step": 2100
},
{
"loss": 0.099,
"grad_norm": 3.429462432861328,
"learning_rate": 5.41307371349096e-06,
"epoch": 2.625582283314242,
"step": 2150
},
{
"loss": 0.0988,
"grad_norm": 2.946981906890869,
"learning_rate": 5.273991655076496e-06,
"epoch": 2.6866743031691485,
"step": 2200
},
{
"loss": 0.1095,
"grad_norm": 2.541302442550659,
"learning_rate": 5.134909596662031e-06,
"epoch": 2.747766323024055,
"step": 2250
},
{
"loss": 0.0848,
"grad_norm": 2.9033353328704834,
"learning_rate": 4.995827538247566e-06,
"epoch": 2.8088583428789615,
"step": 2300
},
{
"loss": 0.0905,
"grad_norm": 3.764165163040161,
"learning_rate": 4.856745479833102e-06,
"epoch": 2.869950362733868,
"step": 2350
},
{
"loss": 0.0929,
"grad_norm": 2.633665084838867,
"learning_rate": 4.7176634214186375e-06,
"epoch": 2.9310423825887746,
"step": 2400
},
{
"loss": 0.0922,
"grad_norm": 5.649708271026611,
"learning_rate": 4.578581363004173e-06,
"epoch": 2.9921344024436807,
"step": 2450
},
{
"loss": 0.0598,
"grad_norm": 3.696406602859497,
"learning_rate": 4.439499304589708e-06,
"epoch": 3.0525391370752195,
"step": 2500
},
{
"eval_loss": 0.16838780045509338,
"eval_lev": 0.0839466685620889,
"eval_runtime": 752.8171,
"eval_samples_per_second": 7.731,
"eval_steps_per_second": 1.933,
"epoch": 3.0525391370752195,
"step": 2500
},
{
"loss": 0.0645,
"grad_norm": 1.515001654624939,
"learning_rate": 4.300417246175244e-06,
"epoch": 3.113631156930126,
"step": 2550
},
{
"loss": 0.0615,
"grad_norm": 1.3002523183822632,
"learning_rate": 4.16133518776078e-06,
"epoch": 3.1747231767850326,
"step": 2600
},
{
"loss": 0.0517,
"grad_norm": 3.700875997543335,
"learning_rate": 4.022253129346315e-06,
"epoch": 3.235815196639939,
"step": 2650
},
{
"loss": 0.0508,
"grad_norm": 2.631333827972412,
"learning_rate": 3.8831710709318496e-06,
"epoch": 3.296907216494845,
"step": 2700
},
{
"loss": 0.0475,
"grad_norm": 2.8750274181365967,
"learning_rate": 3.7440890125173858e-06,
"epoch": 3.3579992363497517,
"step": 2750
},
{
"loss": 0.0649,
"grad_norm": 2.2158966064453125,
"learning_rate": 3.605006954102921e-06,
"epoch": 3.4190912562046583,
"step": 2800
},
{
"loss": 0.0548,
"grad_norm": 2.202899694442749,
"learning_rate": 3.465924895688457e-06,
"epoch": 3.480183276059565,
"step": 2850
},
{
"loss": 0.0485,
"grad_norm": 3.5741360187530518,
"learning_rate": 3.3268428372739918e-06,
"epoch": 3.5412752959144713,
"step": 2900
},
{
"loss": 0.0453,
"grad_norm": 2.79834246635437,
"learning_rate": 3.187760778859527e-06,
"epoch": 3.602367315769378,
"step": 2950
},
{
"loss": 0.0611,
"grad_norm": 1.3616344928741455,
"learning_rate": 3.048678720445063e-06,
"epoch": 3.663459335624284,
"step": 3000
},
{
"eval_loss": 0.1635599136352539,
"eval_lev": 0.08673565709996436,
"eval_runtime": 742.0161,
"eval_samples_per_second": 7.843,
"eval_steps_per_second": 1.961,
"epoch": 3.663459335624284,
"step": 3000
},
{
"loss": 0.0413,
"grad_norm": 2.2232677936553955,
"learning_rate": 2.9095966620305982e-06,
"epoch": 3.7245513554791905,
"step": 3050
},
{
"loss": 0.0531,
"grad_norm": 2.548612117767334,
"learning_rate": 2.770514603616134e-06,
"epoch": 3.785643375334097,
"step": 3100
},
{
"loss": 0.0531,
"grad_norm": 3.896488666534424,
"learning_rate": 2.6314325452016694e-06,
"epoch": 3.8467353951890035,
"step": 3150
},
{
"loss": 0.0557,
"grad_norm": 1.2170665264129639,
"learning_rate": 2.4923504867872047e-06,
"epoch": 3.9078274150439096,
"step": 3200
},
{
"loss": 0.0531,
"grad_norm": 1.4885497093200684,
"learning_rate": 2.35326842837274e-06,
"epoch": 3.968919434898816,
"step": 3250
},
{
"loss": 0.0429,
"grad_norm": 1.6385103464126587,
"learning_rate": 2.2141863699582754e-06,
"epoch": 4.029324169530355,
"step": 3300
},
{
"loss": 0.0309,
"grad_norm": 1.5109846591949463,
"learning_rate": 2.075104311543811e-06,
"epoch": 4.0904161893852615,
"step": 3350
},
{
"loss": 0.0413,
"grad_norm": 1.5601056814193726,
"learning_rate": 1.9360222531293465e-06,
"epoch": 4.151508209240168,
"step": 3400
},
{
"loss": 0.0269,
"grad_norm": 1.4139606952667236,
"learning_rate": 1.7969401947148818e-06,
"epoch": 4.212600229095075,
"step": 3450
},
{
"loss": 0.0363,
"grad_norm": 1.725199818611145,
"learning_rate": 1.6578581363004174e-06,
"epoch": 4.273692248949981,
"step": 3500
},
{
"eval_loss": 0.16628701984882355,
"eval_lev": 0.06235171194318398,
"eval_runtime": 748.3873,
"eval_samples_per_second": 7.777,
"eval_steps_per_second": 1.944,
"epoch": 4.273692248949981,
"step": 3500
},
{
"loss": 0.0247,
"grad_norm": 1.4835779666900635,
"learning_rate": 1.5187760778859527e-06,
"epoch": 4.334784268804888,
"step": 3550
},
{
"loss": 0.0291,
"grad_norm": 1.5756815671920776,
"learning_rate": 1.3796940194714883e-06,
"epoch": 4.395876288659794,
"step": 3600
},
{
"loss": 0.0247,
"grad_norm": 1.7207695245742798,
"learning_rate": 1.2406119610570236e-06,
"epoch": 4.4569683085147,
"step": 3650
},
{
"loss": 0.0341,
"grad_norm": 1.29410982131958,
"learning_rate": 1.1015299026425592e-06,
"epoch": 4.518060328369606,
"step": 3700
},
{
"loss": 0.0248,
"grad_norm": 2.7500200271606445,
"learning_rate": 9.624478442280945e-07,
"epoch": 4.579152348224513,
"step": 3750
},
{
"loss": 0.0301,
"grad_norm": 1.767777442932129,
"learning_rate": 8.233657858136301e-07,
"epoch": 4.640244368079419,
"step": 3800
},
{
"loss": 0.027,
"grad_norm": 1.7285575866699219,
"learning_rate": 6.842837273991656e-07,
"epoch": 4.701336387934326,
"step": 3850
},
{
"loss": 0.037,
"grad_norm": 1.4113770723342896,
"learning_rate": 5.45201668984701e-07,
"epoch": 4.7624284077892325,
"step": 3900
},
{
"loss": 0.0245,
"grad_norm": 1.7481070756912231,
"learning_rate": 4.061196105702365e-07,
"epoch": 4.823520427644139,
"step": 3950
},
{
"loss": 0.0281,
"grad_norm": 2.5071568489074707,
"learning_rate": 2.6703755215577195e-07,
"epoch": 4.8846124474990456,
"step": 4000
},
{
"eval_loss": 0.16462065279483795,
"eval_lev": 0.06492746862423085,
"eval_runtime": 747.1834,
"eval_samples_per_second": 7.789,
"eval_steps_per_second": 1.947,
"epoch": 4.8846124474990456,
"step": 4000
},
{
"loss": 0.0347,
"grad_norm": 1.3820921182632446,
"learning_rate": 1.2795549374130738e-07,
"epoch": 4.945704467353952,
"step": 4050
},
{
"train_runtime": 31790.5788,
"train_samples_per_second": 8.238,
"train_steps_per_second": 0.129,
"total_flos": 7.55790734168064e+19,
"train_loss": 0.33621121611496174,
"epoch": 5.0,
"step": 4095
}
]