latin_whisper-small / trainer_state.json
Ken-Z's picture
Upload folder using huggingface_hub
1cf8e36 verified
{
"best_global_step": 750,
"best_metric": 23.863094670607303,
"best_model_checkpoint": "./whisper-small-dv/checkpoint-500",
"epoch": 7.853403141361256,
"eval_steps": 250,
"global_step": 1500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.13089005235602094,
"grad_norm": 10.698588371276855,
"learning_rate": 4.800000000000001e-06,
"loss": 3.1116,
"step": 25
},
{
"epoch": 0.2617801047120419,
"grad_norm": 9.169360160827637,
"learning_rate": 9.800000000000001e-06,
"loss": 1.6439,
"step": 50
},
{
"epoch": 0.39267015706806285,
"grad_norm": 9.506369590759277,
"learning_rate": 1e-05,
"loss": 1.1823,
"step": 75
},
{
"epoch": 0.5235602094240838,
"grad_norm": 8.656845092773438,
"learning_rate": 1e-05,
"loss": 0.9809,
"step": 100
},
{
"epoch": 0.6544502617801047,
"grad_norm": 7.963810443878174,
"learning_rate": 1e-05,
"loss": 0.8699,
"step": 125
},
{
"epoch": 0.7853403141361257,
"grad_norm": 8.005380630493164,
"learning_rate": 1e-05,
"loss": 0.7183,
"step": 150
},
{
"epoch": 0.9162303664921466,
"grad_norm": 6.437658309936523,
"learning_rate": 1e-05,
"loss": 0.5152,
"step": 175
},
{
"epoch": 1.0471204188481675,
"grad_norm": 4.852717399597168,
"learning_rate": 1e-05,
"loss": 0.4517,
"step": 200
},
{
"epoch": 1.1780104712041886,
"grad_norm": 4.497104167938232,
"learning_rate": 1e-05,
"loss": 0.3607,
"step": 225
},
{
"epoch": 1.3089005235602094,
"grad_norm": 5.882933139801025,
"learning_rate": 1e-05,
"loss": 0.3435,
"step": 250
},
{
"epoch": 1.3089005235602094,
"eval_loss": 0.4582085609436035,
"eval_runtime": 111.6293,
"eval_samples_per_second": 7.597,
"eval_steps_per_second": 0.475,
"eval_wer": 29.89798836876728,
"eval_wer_ortho": 36.224586737602124,
"step": 250
},
{
"epoch": 1.4397905759162304,
"grad_norm": 5.737706184387207,
"learning_rate": 1e-05,
"loss": 0.338,
"step": 275
},
{
"epoch": 1.5706806282722514,
"grad_norm": 4.902078151702881,
"learning_rate": 1e-05,
"loss": 0.3241,
"step": 300
},
{
"epoch": 1.7015706806282722,
"grad_norm": 6.897104263305664,
"learning_rate": 1e-05,
"loss": 0.3376,
"step": 325
},
{
"epoch": 1.8324607329842932,
"grad_norm": 5.536213397979736,
"learning_rate": 1e-05,
"loss": 0.3219,
"step": 350
},
{
"epoch": 1.9633507853403143,
"grad_norm": 4.42805290222168,
"learning_rate": 1e-05,
"loss": 0.304,
"step": 375
},
{
"epoch": 2.094240837696335,
"grad_norm": 3.3683159351348877,
"learning_rate": 1e-05,
"loss": 0.2182,
"step": 400
},
{
"epoch": 2.225130890052356,
"grad_norm": 3.863335609436035,
"learning_rate": 1e-05,
"loss": 0.1761,
"step": 425
},
{
"epoch": 2.356020942408377,
"grad_norm": 3.0521528720855713,
"learning_rate": 1e-05,
"loss": 0.2013,
"step": 450
},
{
"epoch": 2.486910994764398,
"grad_norm": 3.18312931060791,
"learning_rate": 1e-05,
"loss": 0.1766,
"step": 475
},
{
"epoch": 2.6178010471204187,
"grad_norm": 4.015468597412109,
"learning_rate": 1e-05,
"loss": 0.1883,
"step": 500
},
{
"epoch": 2.6178010471204187,
"eval_loss": 0.40125927329063416,
"eval_runtime": 113.2585,
"eval_samples_per_second": 7.487,
"eval_steps_per_second": 0.468,
"eval_wer": 25.49337401086853,
"eval_wer_ortho": 31.702451073532206,
"step": 500
},
{
"epoch": 2.7486910994764395,
"grad_norm": 4.0088043212890625,
"learning_rate": 1e-05,
"loss": 0.1923,
"step": 525
},
{
"epoch": 2.8795811518324608,
"grad_norm": 5.095918655395508,
"learning_rate": 1e-05,
"loss": 0.1753,
"step": 550
},
{
"epoch": 3.0104712041884816,
"grad_norm": 2.5282716751098633,
"learning_rate": 1e-05,
"loss": 0.1686,
"step": 575
},
{
"epoch": 3.141361256544503,
"grad_norm": 3.1651320457458496,
"learning_rate": 1e-05,
"loss": 0.1041,
"step": 600
},
{
"epoch": 3.2722513089005236,
"grad_norm": 3.057821750640869,
"learning_rate": 1e-05,
"loss": 0.0916,
"step": 625
},
{
"epoch": 3.4031413612565444,
"grad_norm": 2.9415502548217773,
"learning_rate": 1e-05,
"loss": 0.1017,
"step": 650
},
{
"epoch": 3.5340314136125652,
"grad_norm": 3.2176668643951416,
"learning_rate": 1e-05,
"loss": 0.0951,
"step": 675
},
{
"epoch": 3.6649214659685865,
"grad_norm": 3.7452964782714844,
"learning_rate": 1e-05,
"loss": 0.1,
"step": 700
},
{
"epoch": 3.7958115183246073,
"grad_norm": 3.433546543121338,
"learning_rate": 1e-05,
"loss": 0.0931,
"step": 725
},
{
"epoch": 3.9267015706806285,
"grad_norm": 3.0128612518310547,
"learning_rate": 1e-05,
"loss": 0.0977,
"step": 750
},
{
"epoch": 3.9267015706806285,
"eval_loss": 0.40142181515693665,
"eval_runtime": 111.1949,
"eval_samples_per_second": 7.626,
"eval_steps_per_second": 0.477,
"eval_wer": 23.863094670607303,
"eval_wer_ortho": 30.543416302489074,
"step": 750
},
{
"epoch": 4.057591623036649,
"grad_norm": 1.552932620048523,
"learning_rate": 1e-05,
"loss": 0.0771,
"step": 775
},
{
"epoch": 4.18848167539267,
"grad_norm": 2.078547954559326,
"learning_rate": 1e-05,
"loss": 0.0504,
"step": 800
},
{
"epoch": 4.319371727748691,
"grad_norm": 1.9236699342727661,
"learning_rate": 1e-05,
"loss": 0.06,
"step": 825
},
{
"epoch": 4.450261780104712,
"grad_norm": 2.1147005558013916,
"learning_rate": 1e-05,
"loss": 0.0487,
"step": 850
},
{
"epoch": 4.581151832460733,
"grad_norm": 2.60475492477417,
"learning_rate": 1e-05,
"loss": 0.0503,
"step": 875
},
{
"epoch": 4.712041884816754,
"grad_norm": 1.980432391166687,
"learning_rate": 1e-05,
"loss": 0.0477,
"step": 900
},
{
"epoch": 4.842931937172775,
"grad_norm": 2.8968000411987305,
"learning_rate": 1e-05,
"loss": 0.0542,
"step": 925
},
{
"epoch": 4.973821989528796,
"grad_norm": 3.0612237453460693,
"learning_rate": 1e-05,
"loss": 0.0544,
"step": 950
},
{
"epoch": 5.104712041884817,
"grad_norm": 1.3939883708953857,
"learning_rate": 1e-05,
"loss": 0.0381,
"step": 975
},
{
"epoch": 5.2356020942408374,
"grad_norm": 4.066138744354248,
"learning_rate": 1e-05,
"loss": 0.0267,
"step": 1000
},
{
"epoch": 5.2356020942408374,
"eval_loss": 0.45016494393348694,
"eval_runtime": 112.8165,
"eval_samples_per_second": 7.517,
"eval_steps_per_second": 0.47,
"eval_wer": 24.57812946896749,
"eval_wer_ortho": 31.008930267908035,
"step": 1000
},
{
"epoch": 5.366492146596858,
"grad_norm": 2.1948580741882324,
"learning_rate": 1e-05,
"loss": 0.0271,
"step": 1025
},
{
"epoch": 5.49738219895288,
"grad_norm": 1.8259961605072021,
"learning_rate": 1e-05,
"loss": 0.0304,
"step": 1050
},
{
"epoch": 5.628272251308901,
"grad_norm": 2.1298553943634033,
"learning_rate": 1e-05,
"loss": 0.0327,
"step": 1075
},
{
"epoch": 5.7591623036649215,
"grad_norm": 1.3052338361740112,
"learning_rate": 1e-05,
"loss": 0.03,
"step": 1100
},
{
"epoch": 5.890052356020942,
"grad_norm": 2.323640823364258,
"learning_rate": 1e-05,
"loss": 0.0297,
"step": 1125
},
{
"epoch": 6.020942408376963,
"grad_norm": 0.8321124315261841,
"learning_rate": 1e-05,
"loss": 0.0237,
"step": 1150
},
{
"epoch": 6.151832460732984,
"grad_norm": 1.0479042530059814,
"learning_rate": 1e-05,
"loss": 0.0161,
"step": 1175
},
{
"epoch": 6.282722513089006,
"grad_norm": 0.9512850642204285,
"learning_rate": 1e-05,
"loss": 0.0171,
"step": 1200
},
{
"epoch": 6.4136125654450264,
"grad_norm": 1.3394098281860352,
"learning_rate": 1e-05,
"loss": 0.018,
"step": 1225
},
{
"epoch": 6.544502617801047,
"grad_norm": 1.1322811841964722,
"learning_rate": 1e-05,
"loss": 0.0179,
"step": 1250
},
{
"epoch": 6.544502617801047,
"eval_loss": 0.46418532729148865,
"eval_runtime": 114.086,
"eval_samples_per_second": 7.433,
"eval_steps_per_second": 0.465,
"eval_wer": 24.149108589951375,
"eval_wer_ortho": 30.277408322249666,
"step": 1250
},
{
"epoch": 6.675392670157068,
"grad_norm": 0.8465819954872131,
"learning_rate": 1e-05,
"loss": 0.0195,
"step": 1275
},
{
"epoch": 6.806282722513089,
"grad_norm": 2.5051374435424805,
"learning_rate": 1e-05,
"loss": 0.0198,
"step": 1300
},
{
"epoch": 6.93717277486911,
"grad_norm": 2.2962050437927246,
"learning_rate": 1e-05,
"loss": 0.0175,
"step": 1325
},
{
"epoch": 7.0680628272251305,
"grad_norm": 1.9513806104660034,
"learning_rate": 1e-05,
"loss": 0.0171,
"step": 1350
},
{
"epoch": 7.198952879581152,
"grad_norm": 0.6999716758728027,
"learning_rate": 1e-05,
"loss": 0.0139,
"step": 1375
},
{
"epoch": 7.329842931937173,
"grad_norm": 1.1750918626785278,
"learning_rate": 1e-05,
"loss": 0.011,
"step": 1400
},
{
"epoch": 7.460732984293194,
"grad_norm": 1.7177971601486206,
"learning_rate": 1e-05,
"loss": 0.01,
"step": 1425
},
{
"epoch": 7.591623036649215,
"grad_norm": 2.9293274879455566,
"learning_rate": 1e-05,
"loss": 0.0127,
"step": 1450
},
{
"epoch": 7.722513089005235,
"grad_norm": 0.5329703092575073,
"learning_rate": 1e-05,
"loss": 0.0107,
"step": 1475
},
{
"epoch": 7.853403141361256,
"grad_norm": 2.054241418838501,
"learning_rate": 1e-05,
"loss": 0.0112,
"step": 1500
},
{
"epoch": 7.853403141361256,
"eval_loss": 0.4861427843570709,
"eval_runtime": 113.9681,
"eval_samples_per_second": 7.441,
"eval_steps_per_second": 0.465,
"eval_wer": 24.358852130803697,
"eval_wer_ortho": 30.372411172335166,
"step": 1500
}
],
"logging_steps": 25,
"max_steps": 2000,
"num_input_tokens_seen": 0,
"num_train_epochs": 11,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 6.91190892306432e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}