YeBhoneLin10's picture
Training in progress, step 2000, checkpoint
1b77b95 verified
{
"best_metric": 30.297225891677677,
"best_model_checkpoint": "./whisper-small-lt/checkpoint-2000",
"epoch": 5.0275,
"eval_steps": 1000,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0125,
"grad_norm": 1.7294858694076538,
"learning_rate": 5.000000000000001e-07,
"loss": 0.0291,
"step": 25
},
{
"epoch": 0.025,
"grad_norm": 1.1127374172210693,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0178,
"step": 50
},
{
"epoch": 0.0375,
"grad_norm": 1.248637318611145,
"learning_rate": 1.5e-06,
"loss": 0.0202,
"step": 75
},
{
"epoch": 0.05,
"grad_norm": 0.8879645466804504,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0166,
"step": 100
},
{
"epoch": 0.0625,
"grad_norm": 1.0460960865020752,
"learning_rate": 2.5e-06,
"loss": 0.0148,
"step": 125
},
{
"epoch": 0.075,
"grad_norm": 0.7859419584274292,
"learning_rate": 3e-06,
"loss": 0.0138,
"step": 150
},
{
"epoch": 0.0875,
"grad_norm": 0.6411359906196594,
"learning_rate": 3.5e-06,
"loss": 0.0124,
"step": 175
},
{
"epoch": 0.1,
"grad_norm": 1.4224746227264404,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0157,
"step": 200
},
{
"epoch": 0.1125,
"grad_norm": 1.8457200527191162,
"learning_rate": 4.5e-06,
"loss": 0.0283,
"step": 225
},
{
"epoch": 0.125,
"grad_norm": 2.3054425716400146,
"learning_rate": 5e-06,
"loss": 0.0244,
"step": 250
},
{
"epoch": 0.1375,
"grad_norm": 2.961075782775879,
"learning_rate": 5.500000000000001e-06,
"loss": 0.0226,
"step": 275
},
{
"epoch": 0.15,
"grad_norm": 1.1010462045669556,
"learning_rate": 6e-06,
"loss": 0.0158,
"step": 300
},
{
"epoch": 0.1625,
"grad_norm": 1.5947000980377197,
"learning_rate": 6.5000000000000004e-06,
"loss": 0.0185,
"step": 325
},
{
"epoch": 0.175,
"grad_norm": 2.136035203933716,
"learning_rate": 7e-06,
"loss": 0.0182,
"step": 350
},
{
"epoch": 0.1875,
"grad_norm": 1.8465747833251953,
"learning_rate": 7.500000000000001e-06,
"loss": 0.0149,
"step": 375
},
{
"epoch": 1.0055,
"grad_norm": 2.093752384185791,
"learning_rate": 8.000000000000001e-06,
"loss": 0.0277,
"step": 400
},
{
"epoch": 1.018,
"grad_norm": 1.3066338300704956,
"learning_rate": 8.5e-06,
"loss": 0.0236,
"step": 425
},
{
"epoch": 1.0305,
"grad_norm": 2.310856819152832,
"learning_rate": 9e-06,
"loss": 0.023,
"step": 450
},
{
"epoch": 1.043,
"grad_norm": 1.4942700862884521,
"learning_rate": 9.5e-06,
"loss": 0.022,
"step": 475
},
{
"epoch": 1.0555,
"grad_norm": 1.1907243728637695,
"learning_rate": 1e-05,
"loss": 0.0183,
"step": 500
},
{
"epoch": 1.068,
"grad_norm": 1.8676977157592773,
"learning_rate": 9.833333333333333e-06,
"loss": 0.0183,
"step": 525
},
{
"epoch": 1.0805,
"grad_norm": 0.9104222059249878,
"learning_rate": 9.666666666666667e-06,
"loss": 0.016,
"step": 550
},
{
"epoch": 1.093,
"grad_norm": 1.465457558631897,
"learning_rate": 9.5e-06,
"loss": 0.0151,
"step": 575
},
{
"epoch": 1.1055,
"grad_norm": 1.5364313125610352,
"learning_rate": 9.333333333333334e-06,
"loss": 0.0205,
"step": 600
},
{
"epoch": 1.1179999999999999,
"grad_norm": 1.29054594039917,
"learning_rate": 9.166666666666666e-06,
"loss": 0.0198,
"step": 625
},
{
"epoch": 1.1305,
"grad_norm": 2.222632884979248,
"learning_rate": 9e-06,
"loss": 0.0167,
"step": 650
},
{
"epoch": 1.143,
"grad_norm": 1.1213107109069824,
"learning_rate": 8.833333333333334e-06,
"loss": 0.015,
"step": 675
},
{
"epoch": 1.1555,
"grad_norm": 3.053809642791748,
"learning_rate": 8.666666666666668e-06,
"loss": 0.0123,
"step": 700
},
{
"epoch": 1.168,
"grad_norm": 3.0312676429748535,
"learning_rate": 8.5e-06,
"loss": 0.0135,
"step": 725
},
{
"epoch": 1.1804999999999999,
"grad_norm": 1.2940341234207153,
"learning_rate": 8.333333333333334e-06,
"loss": 0.0152,
"step": 750
},
{
"epoch": 1.193,
"grad_norm": 1.9949597120285034,
"learning_rate": 8.166666666666668e-06,
"loss": 0.015,
"step": 775
},
{
"epoch": 2.011,
"grad_norm": 1.2895445823669434,
"learning_rate": 8.000000000000001e-06,
"loss": 0.0251,
"step": 800
},
{
"epoch": 2.0235,
"grad_norm": 1.7078819274902344,
"learning_rate": 7.833333333333333e-06,
"loss": 0.0159,
"step": 825
},
{
"epoch": 2.036,
"grad_norm": 0.7963767647743225,
"learning_rate": 7.666666666666667e-06,
"loss": 0.0158,
"step": 850
},
{
"epoch": 2.0485,
"grad_norm": 1.2390975952148438,
"learning_rate": 7.500000000000001e-06,
"loss": 0.0141,
"step": 875
},
{
"epoch": 2.061,
"grad_norm": 1.4780830144882202,
"learning_rate": 7.333333333333333e-06,
"loss": 0.0112,
"step": 900
},
{
"epoch": 2.0735,
"grad_norm": 0.7993568778038025,
"learning_rate": 7.166666666666667e-06,
"loss": 0.0118,
"step": 925
},
{
"epoch": 2.086,
"grad_norm": 1.1336815357208252,
"learning_rate": 7e-06,
"loss": 0.0089,
"step": 950
},
{
"epoch": 2.0985,
"grad_norm": 1.7068537473678589,
"learning_rate": 6.833333333333334e-06,
"loss": 0.0107,
"step": 975
},
{
"epoch": 2.111,
"grad_norm": 1.4282974004745483,
"learning_rate": 6.666666666666667e-06,
"loss": 0.0117,
"step": 1000
},
{
"epoch": 2.111,
"eval_loss": 0.15551722049713135,
"eval_runtime": 621.1845,
"eval_samples_per_second": 1.025,
"eval_steps_per_second": 0.129,
"eval_wer": 33.69881109643329,
"step": 1000
},
{
"epoch": 2.1235,
"grad_norm": 2.979623317718506,
"learning_rate": 6.5000000000000004e-06,
"loss": 0.0111,
"step": 1025
},
{
"epoch": 2.136,
"grad_norm": 2.2827956676483154,
"learning_rate": 6.333333333333333e-06,
"loss": 0.0137,
"step": 1050
},
{
"epoch": 2.1485,
"grad_norm": 1.0386461019515991,
"learning_rate": 6.166666666666667e-06,
"loss": 0.0102,
"step": 1075
},
{
"epoch": 2.161,
"grad_norm": 1.0650137662887573,
"learning_rate": 6e-06,
"loss": 0.0104,
"step": 1100
},
{
"epoch": 2.1734999999999998,
"grad_norm": 1.1739250421524048,
"learning_rate": 5.833333333333334e-06,
"loss": 0.0115,
"step": 1125
},
{
"epoch": 2.186,
"grad_norm": 1.6481997966766357,
"learning_rate": 5.666666666666667e-06,
"loss": 0.0096,
"step": 1150
},
{
"epoch": 3.004,
"grad_norm": 1.7796248197555542,
"learning_rate": 5.500000000000001e-06,
"loss": 0.0166,
"step": 1175
},
{
"epoch": 3.0165,
"grad_norm": 1.6211556196212769,
"learning_rate": 5.333333333333334e-06,
"loss": 0.0125,
"step": 1200
},
{
"epoch": 3.029,
"grad_norm": 1.1395519971847534,
"learning_rate": 5.1666666666666675e-06,
"loss": 0.008,
"step": 1225
},
{
"epoch": 3.0415,
"grad_norm": 0.41915813088417053,
"learning_rate": 5e-06,
"loss": 0.007,
"step": 1250
},
{
"epoch": 3.054,
"grad_norm": 2.371992588043213,
"learning_rate": 4.833333333333333e-06,
"loss": 0.0056,
"step": 1275
},
{
"epoch": 3.0665,
"grad_norm": 0.7025607824325562,
"learning_rate": 4.666666666666667e-06,
"loss": 0.0051,
"step": 1300
},
{
"epoch": 3.079,
"grad_norm": 0.7528577446937561,
"learning_rate": 4.5e-06,
"loss": 0.0044,
"step": 1325
},
{
"epoch": 3.0915,
"grad_norm": 0.7671001553535461,
"learning_rate": 4.333333333333334e-06,
"loss": 0.0035,
"step": 1350
},
{
"epoch": 3.104,
"grad_norm": 1.1365867853164673,
"learning_rate": 4.166666666666667e-06,
"loss": 0.0053,
"step": 1375
},
{
"epoch": 3.1165,
"grad_norm": 0.31649962067604065,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0057,
"step": 1400
},
{
"epoch": 3.129,
"grad_norm": 0.866807758808136,
"learning_rate": 3.833333333333334e-06,
"loss": 0.0059,
"step": 1425
},
{
"epoch": 3.1415,
"grad_norm": 0.9588886499404907,
"learning_rate": 3.6666666666666666e-06,
"loss": 0.0061,
"step": 1450
},
{
"epoch": 3.154,
"grad_norm": 0.5269432663917542,
"learning_rate": 3.5e-06,
"loss": 0.0042,
"step": 1475
},
{
"epoch": 3.1665,
"grad_norm": 1.0368000268936157,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.0044,
"step": 1500
},
{
"epoch": 3.179,
"grad_norm": 0.9669756889343262,
"learning_rate": 3.1666666666666667e-06,
"loss": 0.0051,
"step": 1525
},
{
"epoch": 3.1915,
"grad_norm": 1.1753712892532349,
"learning_rate": 3e-06,
"loss": 0.0037,
"step": 1550
},
{
"epoch": 4.0095,
"grad_norm": 0.7525417804718018,
"learning_rate": 2.8333333333333335e-06,
"loss": 0.0073,
"step": 1575
},
{
"epoch": 4.022,
"grad_norm": 0.36638399958610535,
"learning_rate": 2.666666666666667e-06,
"loss": 0.0048,
"step": 1600
},
{
"epoch": 4.0345,
"grad_norm": 0.5773335099220276,
"learning_rate": 2.5e-06,
"loss": 0.0031,
"step": 1625
},
{
"epoch": 4.047,
"grad_norm": 0.44142794609069824,
"learning_rate": 2.3333333333333336e-06,
"loss": 0.0022,
"step": 1650
},
{
"epoch": 4.0595,
"grad_norm": 0.3425196409225464,
"learning_rate": 2.166666666666667e-06,
"loss": 0.0019,
"step": 1675
},
{
"epoch": 4.072,
"grad_norm": 0.23644089698791504,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0019,
"step": 1700
},
{
"epoch": 4.0845,
"grad_norm": 0.29089945554733276,
"learning_rate": 1.8333333333333333e-06,
"loss": 0.0013,
"step": 1725
},
{
"epoch": 4.097,
"grad_norm": 1.356613278388977,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.0017,
"step": 1750
},
{
"epoch": 4.1095,
"grad_norm": 0.5859806537628174,
"learning_rate": 1.5e-06,
"loss": 0.0024,
"step": 1775
},
{
"epoch": 4.122,
"grad_norm": 0.4378674626350403,
"learning_rate": 1.3333333333333334e-06,
"loss": 0.0027,
"step": 1800
},
{
"epoch": 4.1345,
"grad_norm": 0.46167829632759094,
"learning_rate": 1.1666666666666668e-06,
"loss": 0.0025,
"step": 1825
},
{
"epoch": 4.147,
"grad_norm": 0.41351112723350525,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0024,
"step": 1850
},
{
"epoch": 4.1595,
"grad_norm": 0.35589855909347534,
"learning_rate": 8.333333333333333e-07,
"loss": 0.0019,
"step": 1875
},
{
"epoch": 4.172,
"grad_norm": 1.8024345636367798,
"learning_rate": 6.666666666666667e-07,
"loss": 0.0018,
"step": 1900
},
{
"epoch": 4.1845,
"grad_norm": 0.5813783407211304,
"learning_rate": 5.000000000000001e-07,
"loss": 0.0013,
"step": 1925
},
{
"epoch": 5.0025,
"grad_norm": 0.9400327205657959,
"learning_rate": 3.3333333333333335e-07,
"loss": 0.0023,
"step": 1950
},
{
"epoch": 5.015,
"grad_norm": 0.6248894333839417,
"learning_rate": 1.6666666666666668e-07,
"loss": 0.0046,
"step": 1975
},
{
"epoch": 5.0275,
"grad_norm": 0.5471067428588867,
"learning_rate": 0.0,
"loss": 0.0022,
"step": 2000
},
{
"epoch": 5.0275,
"eval_loss": 0.1518355756998062,
"eval_runtime": 632.1111,
"eval_samples_per_second": 1.008,
"eval_steps_per_second": 0.127,
"eval_wer": 30.297225891677677,
"step": 2000
}
],
"logging_steps": 25,
"max_steps": 2000,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.2217464672256e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}