wav2vec2-teochew / trainer_state.json
a-tsioh's picture
Upload folder using huggingface_hub
cb5e4f6 verified
{
"best_metric": 0.1309033051276488,
"best_model_checkpoint": "/data/schen/xlsr_teochew_model_no_punctuation_pinyin_ES/checkpoint-3500",
"epoch": 11.2,
"eval_steps": 500,
"global_step": 3500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.16,
"grad_norm": 1.2721055746078491,
"learning_rate": 2.9999999999999997e-05,
"loss": 0.1692,
"step": 50
},
{
"epoch": 0.32,
"grad_norm": 0.4928969442844391,
"learning_rate": 5.9999999999999995e-05,
"loss": 0.151,
"step": 100
},
{
"epoch": 0.48,
"grad_norm": 0.5016694068908691,
"learning_rate": 8.999999999999999e-05,
"loss": 0.1446,
"step": 150
},
{
"epoch": 0.64,
"grad_norm": 0.5080090165138245,
"learning_rate": 0.00011999999999999999,
"loss": 0.1408,
"step": 200
},
{
"epoch": 0.8,
"grad_norm": 1.2801826000213623,
"learning_rate": 0.0001494,
"loss": 0.1475,
"step": 250
},
{
"epoch": 0.96,
"grad_norm": 1.2742992639541626,
"learning_rate": 0.00017939999999999997,
"loss": 0.1384,
"step": 300
},
{
"epoch": 1.12,
"grad_norm": 0.6099441051483154,
"learning_rate": 0.00020939999999999997,
"loss": 0.1396,
"step": 350
},
{
"epoch": 1.28,
"grad_norm": 0.5118276476860046,
"learning_rate": 0.0002394,
"loss": 0.1394,
"step": 400
},
{
"epoch": 1.44,
"grad_norm": 0.40769001841545105,
"learning_rate": 0.0002694,
"loss": 0.1384,
"step": 450
},
{
"epoch": 1.6,
"grad_norm": 0.40534549951553345,
"learning_rate": 0.00029939999999999996,
"loss": 0.1496,
"step": 500
},
{
"epoch": 1.6,
"eval_loss": 0.1971094310283661,
"eval_runtime": 40.7703,
"eval_samples_per_second": 30.66,
"eval_steps_per_second": 3.851,
"eval_wer": 0.15227453532942792,
"step": 500
},
{
"epoch": 1.76,
"grad_norm": 0.2814898192882538,
"learning_rate": 0.000299046357615894,
"loss": 0.1536,
"step": 550
},
{
"epoch": 1.92,
"grad_norm": 1.256887674331665,
"learning_rate": 0.0002980529801324503,
"loss": 0.1602,
"step": 600
},
{
"epoch": 2.08,
"grad_norm": 0.2832282483577728,
"learning_rate": 0.0002970596026490066,
"loss": 0.1592,
"step": 650
},
{
"epoch": 2.24,
"grad_norm": 0.3951769769191742,
"learning_rate": 0.0002960662251655629,
"loss": 0.1563,
"step": 700
},
{
"epoch": 2.4,
"grad_norm": 0.4493905305862427,
"learning_rate": 0.00029507284768211917,
"loss": 0.1484,
"step": 750
},
{
"epoch": 2.56,
"grad_norm": 0.2806813716888428,
"learning_rate": 0.0002940794701986755,
"loss": 0.1627,
"step": 800
},
{
"epoch": 2.7199999999999998,
"grad_norm": 0.926182210445404,
"learning_rate": 0.0002930860927152318,
"loss": 0.1675,
"step": 850
},
{
"epoch": 2.88,
"grad_norm": 0.36387988924980164,
"learning_rate": 0.0002921125827814569,
"loss": 0.153,
"step": 900
},
{
"epoch": 3.04,
"grad_norm": 0.2582753002643585,
"learning_rate": 0.00029111920529801324,
"loss": 0.1675,
"step": 950
},
{
"epoch": 3.2,
"grad_norm": 0.30846667289733887,
"learning_rate": 0.0002901258278145695,
"loss": 0.1606,
"step": 1000
},
{
"epoch": 3.2,
"eval_loss": 0.19323976337909698,
"eval_runtime": 40.6846,
"eval_samples_per_second": 30.724,
"eval_steps_per_second": 3.859,
"eval_wer": 0.15527590945252043,
"step": 1000
},
{
"epoch": 3.36,
"grad_norm": 0.35464128851890564,
"learning_rate": 0.0002891324503311258,
"loss": 0.1546,
"step": 1050
},
{
"epoch": 3.52,
"grad_norm": 0.3184821307659149,
"learning_rate": 0.00028813907284768207,
"loss": 0.1532,
"step": 1100
},
{
"epoch": 3.68,
"grad_norm": 0.3197060227394104,
"learning_rate": 0.0002871456953642384,
"loss": 0.1505,
"step": 1150
},
{
"epoch": 3.84,
"grad_norm": 0.2760404050350189,
"learning_rate": 0.0002861721854304636,
"loss": 0.1623,
"step": 1200
},
{
"epoch": 4.0,
"grad_norm": 0.49457865953445435,
"learning_rate": 0.00028517880794701986,
"loss": 0.1666,
"step": 1250
},
{
"epoch": 4.16,
"grad_norm": 1.0381335020065308,
"learning_rate": 0.00028418543046357614,
"loss": 0.1409,
"step": 1300
},
{
"epoch": 4.32,
"grad_norm": 0.5828660130500793,
"learning_rate": 0.0002831920529801324,
"loss": 0.1432,
"step": 1350
},
{
"epoch": 4.48,
"grad_norm": 0.9824939966201782,
"learning_rate": 0.0002821986754966887,
"loss": 0.1447,
"step": 1400
},
{
"epoch": 4.64,
"grad_norm": 0.5372733473777771,
"learning_rate": 0.00028120529801324497,
"loss": 0.15,
"step": 1450
},
{
"epoch": 4.8,
"grad_norm": 0.5010742545127869,
"learning_rate": 0.0002802119205298013,
"loss": 0.1512,
"step": 1500
},
{
"epoch": 4.8,
"eval_loss": 0.19122301042079926,
"eval_runtime": 40.5484,
"eval_samples_per_second": 30.827,
"eval_steps_per_second": 3.872,
"eval_wer": 0.15097273450495408,
"step": 1500
},
{
"epoch": 4.96,
"grad_norm": 0.6968886852264404,
"learning_rate": 0.0002792185430463576,
"loss": 0.1567,
"step": 1550
},
{
"epoch": 5.12,
"grad_norm": 0.44052571058273315,
"learning_rate": 0.00027824503311258276,
"loss": 0.1469,
"step": 1600
},
{
"epoch": 5.28,
"grad_norm": 0.278921514749527,
"learning_rate": 0.00027727152317880794,
"loss": 0.1497,
"step": 1650
},
{
"epoch": 5.44,
"grad_norm": 0.380680650472641,
"learning_rate": 0.0002762781456953642,
"loss": 0.1449,
"step": 1700
},
{
"epoch": 5.6,
"grad_norm": 0.42826640605926514,
"learning_rate": 0.0002752847682119205,
"loss": 0.1447,
"step": 1750
},
{
"epoch": 5.76,
"grad_norm": 0.4069671928882599,
"learning_rate": 0.0002742913907284768,
"loss": 0.1491,
"step": 1800
},
{
"epoch": 5.92,
"grad_norm": 0.3466169238090515,
"learning_rate": 0.00027329801324503305,
"loss": 0.1573,
"step": 1850
},
{
"epoch": 6.08,
"grad_norm": 0.3684636652469635,
"learning_rate": 0.0002723046357615894,
"loss": 0.1392,
"step": 1900
},
{
"epoch": 6.24,
"grad_norm": 0.4097164273262024,
"learning_rate": 0.00027131125827814566,
"loss": 0.1338,
"step": 1950
},
{
"epoch": 6.4,
"grad_norm": 0.3014273941516876,
"learning_rate": 0.00027033774834437084,
"loss": 0.1333,
"step": 2000
},
{
"epoch": 6.4,
"eval_loss": 0.19138558208942413,
"eval_runtime": 41.0557,
"eval_samples_per_second": 30.446,
"eval_steps_per_second": 3.824,
"eval_wer": 0.14066681131120273,
"step": 2000
},
{
"epoch": 6.5600000000000005,
"grad_norm": 0.2529243528842926,
"learning_rate": 0.0002693443708609271,
"loss": 0.1367,
"step": 2050
},
{
"epoch": 6.72,
"grad_norm": 0.23533473908901215,
"learning_rate": 0.00026835099337748345,
"loss": 0.1439,
"step": 2100
},
{
"epoch": 6.88,
"grad_norm": 0.524277925491333,
"learning_rate": 0.00026735761589403973,
"loss": 0.1379,
"step": 2150
},
{
"epoch": 7.04,
"grad_norm": 0.5774759650230408,
"learning_rate": 0.000266364238410596,
"loss": 0.1354,
"step": 2200
},
{
"epoch": 7.2,
"grad_norm": 0.26473215222358704,
"learning_rate": 0.00026539072847682113,
"loss": 0.143,
"step": 2250
},
{
"epoch": 7.36,
"grad_norm": 0.27728524804115295,
"learning_rate": 0.00026439735099337747,
"loss": 0.1315,
"step": 2300
},
{
"epoch": 7.52,
"grad_norm": 0.7108523845672607,
"learning_rate": 0.00026340397350993374,
"loss": 0.1359,
"step": 2350
},
{
"epoch": 7.68,
"grad_norm": 0.33975887298583984,
"learning_rate": 0.0002624105960264901,
"loss": 0.1337,
"step": 2400
},
{
"epoch": 7.84,
"grad_norm": 0.3907933533191681,
"learning_rate": 0.00026141721854304635,
"loss": 0.126,
"step": 2450
},
{
"epoch": 8.0,
"grad_norm": 0.48056796193122864,
"learning_rate": 0.00026042384105960263,
"loss": 0.1342,
"step": 2500
},
{
"epoch": 8.0,
"eval_loss": 0.2016124725341797,
"eval_runtime": 41.1292,
"eval_samples_per_second": 30.392,
"eval_steps_per_second": 3.817,
"eval_wer": 0.14721197656758517,
"step": 2500
},
{
"epoch": 8.16,
"grad_norm": 0.7174838185310364,
"learning_rate": 0.0002594304635761589,
"loss": 0.1325,
"step": 2550
},
{
"epoch": 8.32,
"grad_norm": 0.7047253251075745,
"learning_rate": 0.0002584370860927152,
"loss": 0.1279,
"step": 2600
},
{
"epoch": 8.48,
"grad_norm": 0.7922475934028625,
"learning_rate": 0.00025746357615894037,
"loss": 0.1304,
"step": 2650
},
{
"epoch": 8.64,
"grad_norm": 0.5174902081489563,
"learning_rate": 0.00025647019867549664,
"loss": 0.1272,
"step": 2700
},
{
"epoch": 8.8,
"grad_norm": 1.5003706216812134,
"learning_rate": 0.000255476821192053,
"loss": 0.1327,
"step": 2750
},
{
"epoch": 8.96,
"grad_norm": 0.7675436735153198,
"learning_rate": 0.00025448344370860925,
"loss": 0.1305,
"step": 2800
},
{
"epoch": 9.12,
"grad_norm": 0.3741365373134613,
"learning_rate": 0.00025349006622516553,
"loss": 0.1118,
"step": 2850
},
{
"epoch": 9.28,
"grad_norm": 0.39765921235084534,
"learning_rate": 0.00025249668874172186,
"loss": 0.1259,
"step": 2900
},
{
"epoch": 9.44,
"grad_norm": 0.32159069180488586,
"learning_rate": 0.00025150331125827814,
"loss": 0.1302,
"step": 2950
},
{
"epoch": 9.6,
"grad_norm": 0.3576621115207672,
"learning_rate": 0.00025052980132450327,
"loss": 0.1252,
"step": 3000
},
{
"epoch": 9.6,
"eval_loss": 0.1925106644630432,
"eval_runtime": 41.148,
"eval_samples_per_second": 30.378,
"eval_steps_per_second": 3.815,
"eval_wer": 0.13513415780718882,
"step": 3000
},
{
"epoch": 9.76,
"grad_norm": 0.43902838230133057,
"learning_rate": 0.0002495364238410596,
"loss": 0.126,
"step": 3050
},
{
"epoch": 9.92,
"grad_norm": 0.49293768405914307,
"learning_rate": 0.0002485430463576159,
"loss": 0.1406,
"step": 3100
},
{
"epoch": 10.08,
"grad_norm": 0.2280844897031784,
"learning_rate": 0.00024754966887417215,
"loss": 0.1191,
"step": 3150
},
{
"epoch": 10.24,
"grad_norm": 0.2204989492893219,
"learning_rate": 0.00024655629139072843,
"loss": 0.1162,
"step": 3200
},
{
"epoch": 10.4,
"grad_norm": 0.8076439499855042,
"learning_rate": 0.00024556291390728476,
"loss": 0.1197,
"step": 3250
},
{
"epoch": 10.56,
"grad_norm": 0.22111420333385468,
"learning_rate": 0.00024456953642384104,
"loss": 0.1156,
"step": 3300
},
{
"epoch": 10.72,
"grad_norm": 0.4509029686450958,
"learning_rate": 0.00024357615894039732,
"loss": 0.1181,
"step": 3350
},
{
"epoch": 10.88,
"grad_norm": 0.5422528386116028,
"learning_rate": 0.00024258278145695365,
"loss": 0.1263,
"step": 3400
},
{
"epoch": 11.04,
"grad_norm": 0.21502262353897095,
"learning_rate": 0.00024160927152317878,
"loss": 0.1157,
"step": 3450
},
{
"epoch": 11.2,
"grad_norm": 0.22735533118247986,
"learning_rate": 0.00024061589403973508,
"loss": 0.111,
"step": 3500
},
{
"epoch": 11.2,
"eval_loss": 0.18385492265224457,
"eval_runtime": 41.1887,
"eval_samples_per_second": 30.348,
"eval_steps_per_second": 3.812,
"eval_wer": 0.1309033051276488,
"step": 3500
}
],
"logging_steps": 50,
"max_steps": 15600,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"total_flos": 1.880734589144977e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}