Whisper-Medium / trainer_state.json
Marwan-Kasem's picture
Upload checkpoint directory
ce09534 verified
{
"best_metric": 19.119313582645297,
"best_model_checkpoint": "./whisper-medium-hi32/checkpoint-1250",
"epoch": 0.3970775095298602,
"eval_steps": 1250,
"global_step": 2500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0039707750952986025,
"grad_norm": 5.025813579559326,
"learning_rate": 2.5e-05,
"loss": 1.0996,
"step": 25
},
{
"epoch": 0.007941550190597205,
"grad_norm": 4.760560989379883,
"learning_rate": 5e-05,
"loss": 0.8774,
"step": 50
},
{
"epoch": 0.011912325285895807,
"grad_norm": 4.298652648925781,
"learning_rate": 7.5e-05,
"loss": 0.3753,
"step": 75
},
{
"epoch": 0.01588310038119441,
"grad_norm": 4.617311477661133,
"learning_rate": 0.0001,
"loss": 0.3013,
"step": 100
},
{
"epoch": 0.01985387547649301,
"grad_norm": 9.100639343261719,
"learning_rate": 0.000125,
"loss": 0.2996,
"step": 125
},
{
"epoch": 0.023824650571791613,
"grad_norm": 3.8302597999572754,
"learning_rate": 0.00015,
"loss": 0.2166,
"step": 150
},
{
"epoch": 0.027795425667090215,
"grad_norm": 3.0683753490448,
"learning_rate": 0.000175,
"loss": 0.2249,
"step": 175
},
{
"epoch": 0.03176620076238882,
"grad_norm": 3.0955100059509277,
"learning_rate": 0.0002,
"loss": 0.2256,
"step": 200
},
{
"epoch": 0.03573697585768742,
"grad_norm": 8.42872142791748,
"learning_rate": 0.00022500000000000002,
"loss": 0.2054,
"step": 225
},
{
"epoch": 0.03970775095298602,
"grad_norm": 1.8967641592025757,
"learning_rate": 0.00025,
"loss": 0.2378,
"step": 250
},
{
"epoch": 0.04367852604828463,
"grad_norm": 3.759864568710327,
"learning_rate": 0.000275,
"loss": 0.2663,
"step": 275
},
{
"epoch": 0.04764930114358323,
"grad_norm": 3.7866759300231934,
"learning_rate": 0.0003,
"loss": 0.2715,
"step": 300
},
{
"epoch": 0.05162007623888183,
"grad_norm": 3.0431158542633057,
"learning_rate": 0.00032500000000000004,
"loss": 0.2401,
"step": 325
},
{
"epoch": 0.05559085133418043,
"grad_norm": 5.701106071472168,
"learning_rate": 0.00035,
"loss": 0.2534,
"step": 350
},
{
"epoch": 0.059561626429479035,
"grad_norm": 2.9990756511688232,
"learning_rate": 0.000375,
"loss": 0.204,
"step": 375
},
{
"epoch": 0.06353240152477764,
"grad_norm": 5.0229573249816895,
"learning_rate": 0.0004,
"loss": 0.2318,
"step": 400
},
{
"epoch": 0.06750317662007624,
"grad_norm": 5.128113746643066,
"learning_rate": 0.000425,
"loss": 0.2111,
"step": 425
},
{
"epoch": 0.07147395171537484,
"grad_norm": 3.8650593757629395,
"learning_rate": 0.000449,
"loss": 0.2493,
"step": 450
},
{
"epoch": 0.07544472681067345,
"grad_norm": 7.482765197753906,
"learning_rate": 0.000474,
"loss": 0.3081,
"step": 475
},
{
"epoch": 0.07941550190597205,
"grad_norm": 4.78742790222168,
"learning_rate": 0.000499,
"loss": 0.2288,
"step": 500
},
{
"epoch": 0.08338627700127065,
"grad_norm": 1.9080342054367065,
"learning_rate": 0.00049731843575419,
"loss": 0.2175,
"step": 525
},
{
"epoch": 0.08735705209656926,
"grad_norm": 6.20205545425415,
"learning_rate": 0.0004945251396648045,
"loss": 0.2218,
"step": 550
},
{
"epoch": 0.09132782719186786,
"grad_norm": 6.478557586669922,
"learning_rate": 0.000491731843575419,
"loss": 0.2954,
"step": 575
},
{
"epoch": 0.09529860228716645,
"grad_norm": 8.531637191772461,
"learning_rate": 0.0004889385474860335,
"loss": 0.2223,
"step": 600
},
{
"epoch": 0.09926937738246505,
"grad_norm": 4.512006759643555,
"learning_rate": 0.0004861452513966481,
"loss": 0.2208,
"step": 625
},
{
"epoch": 0.10324015247776366,
"grad_norm": 2.6392629146575928,
"learning_rate": 0.0004833519553072626,
"loss": 0.2348,
"step": 650
},
{
"epoch": 0.10721092757306226,
"grad_norm": 3.4534718990325928,
"learning_rate": 0.00048055865921787713,
"loss": 0.2385,
"step": 675
},
{
"epoch": 0.11118170266836086,
"grad_norm": 3.7822482585906982,
"learning_rate": 0.00047776536312849164,
"loss": 0.288,
"step": 700
},
{
"epoch": 0.11515247776365947,
"grad_norm": 4.008564472198486,
"learning_rate": 0.0004749720670391062,
"loss": 0.2389,
"step": 725
},
{
"epoch": 0.11912325285895807,
"grad_norm": 4.458996772766113,
"learning_rate": 0.0004721787709497207,
"loss": 0.2875,
"step": 750
},
{
"epoch": 0.12309402795425667,
"grad_norm": 6.05142068862915,
"learning_rate": 0.00046938547486033525,
"loss": 0.2694,
"step": 775
},
{
"epoch": 0.12706480304955528,
"grad_norm": 8.252070426940918,
"learning_rate": 0.00046659217877094975,
"loss": 0.2158,
"step": 800
},
{
"epoch": 0.13103557814485386,
"grad_norm": 3.173055410385132,
"learning_rate": 0.0004637988826815643,
"loss": 0.2154,
"step": 825
},
{
"epoch": 0.13500635324015248,
"grad_norm": 1.4051086902618408,
"learning_rate": 0.00046100558659217876,
"loss": 0.2297,
"step": 850
},
{
"epoch": 0.1389771283354511,
"grad_norm": 10.670642852783203,
"learning_rate": 0.00045821229050279326,
"loss": 0.2181,
"step": 875
},
{
"epoch": 0.14294790343074967,
"grad_norm": 3.286973476409912,
"learning_rate": 0.00045553072625698326,
"loss": 0.2543,
"step": 900
},
{
"epoch": 0.14691867852604829,
"grad_norm": 3.300940752029419,
"learning_rate": 0.00045273743016759776,
"loss": 0.222,
"step": 925
},
{
"epoch": 0.1508894536213469,
"grad_norm": 3.951962947845459,
"learning_rate": 0.00044994413407821226,
"loss": 0.2792,
"step": 950
},
{
"epoch": 0.15486022871664548,
"grad_norm": 4.659793853759766,
"learning_rate": 0.0004471508379888268,
"loss": 0.2409,
"step": 975
},
{
"epoch": 0.1588310038119441,
"grad_norm": 8.52952766418457,
"learning_rate": 0.0004443575418994413,
"loss": 0.2745,
"step": 1000
},
{
"epoch": 0.1628017789072427,
"grad_norm": 4.599229335784912,
"learning_rate": 0.0004415642458100559,
"loss": 0.2656,
"step": 1025
},
{
"epoch": 0.1667725540025413,
"grad_norm": 3.206956386566162,
"learning_rate": 0.00043888268156424583,
"loss": 0.2268,
"step": 1050
},
{
"epoch": 0.1707433290978399,
"grad_norm": 4.573523998260498,
"learning_rate": 0.00043608938547486033,
"loss": 0.2322,
"step": 1075
},
{
"epoch": 0.17471410419313851,
"grad_norm": 2.311208963394165,
"learning_rate": 0.0004332960893854749,
"loss": 0.2445,
"step": 1100
},
{
"epoch": 0.1786848792884371,
"grad_norm": 12.883535385131836,
"learning_rate": 0.0004305027932960894,
"loss": 0.2932,
"step": 1125
},
{
"epoch": 0.1826556543837357,
"grad_norm": 3.4275145530700684,
"learning_rate": 0.00042770949720670394,
"loss": 0.2642,
"step": 1150
},
{
"epoch": 0.1866264294790343,
"grad_norm": 4.927622318267822,
"learning_rate": 0.00042491620111731845,
"loss": 0.2854,
"step": 1175
},
{
"epoch": 0.1905972045743329,
"grad_norm": 3.562868595123291,
"learning_rate": 0.000422122905027933,
"loss": 0.2408,
"step": 1200
},
{
"epoch": 0.19456797966963152,
"grad_norm": 6.159071445465088,
"learning_rate": 0.0004193296089385475,
"loss": 0.262,
"step": 1225
},
{
"epoch": 0.1985387547649301,
"grad_norm": 6.2040114402771,
"learning_rate": 0.00041653631284916206,
"loss": 0.3184,
"step": 1250
},
{
"epoch": 0.1985387547649301,
"eval_loss": 0.2267104983329773,
"eval_runtime": 1316.6693,
"eval_samples_per_second": 0.787,
"eval_steps_per_second": 0.393,
"eval_wer": 19.119313582645297,
"step": 1250
},
{
"epoch": 0.20250952986022872,
"grad_norm": 5.0747246742248535,
"learning_rate": 0.00041374301675977656,
"loss": 0.2806,
"step": 1275
},
{
"epoch": 0.20648030495552733,
"grad_norm": 7.066751003265381,
"learning_rate": 0.00041094972067039106,
"loss": 0.2596,
"step": 1300
},
{
"epoch": 0.2104510800508259,
"grad_norm": 3.551501989364624,
"learning_rate": 0.0004081564245810056,
"loss": 0.2609,
"step": 1325
},
{
"epoch": 0.21442185514612452,
"grad_norm": 4.077850341796875,
"learning_rate": 0.0004053631284916201,
"loss": 0.2232,
"step": 1350
},
{
"epoch": 0.21839263024142314,
"grad_norm": 3.4992265701293945,
"learning_rate": 0.0004025698324022347,
"loss": 0.2224,
"step": 1375
},
{
"epoch": 0.22236340533672172,
"grad_norm": 3.0660696029663086,
"learning_rate": 0.0003997765363128492,
"loss": 0.2006,
"step": 1400
},
{
"epoch": 0.22633418043202033,
"grad_norm": 8.794944763183594,
"learning_rate": 0.00039698324022346374,
"loss": 0.2199,
"step": 1425
},
{
"epoch": 0.23030495552731894,
"grad_norm": 3.5407402515411377,
"learning_rate": 0.00039418994413407824,
"loss": 0.2547,
"step": 1450
},
{
"epoch": 0.23427573062261753,
"grad_norm": 5.331219673156738,
"learning_rate": 0.0003913966480446928,
"loss": 0.2058,
"step": 1475
},
{
"epoch": 0.23824650571791614,
"grad_norm": 7.421133995056152,
"learning_rate": 0.0003886033519553073,
"loss": 0.2284,
"step": 1500
},
{
"epoch": 0.24221728081321475,
"grad_norm": 6.895657539367676,
"learning_rate": 0.00038581005586592175,
"loss": 0.2323,
"step": 1525
},
{
"epoch": 0.24618805590851334,
"grad_norm": 1.489917278289795,
"learning_rate": 0.0003830167597765363,
"loss": 0.2418,
"step": 1550
},
{
"epoch": 0.2501588310038119,
"grad_norm": 3.7984824180603027,
"learning_rate": 0.0003802234636871508,
"loss": 0.2223,
"step": 1575
},
{
"epoch": 0.25412960609911056,
"grad_norm": 3.4480276107788086,
"learning_rate": 0.00037743016759776536,
"loss": 0.2672,
"step": 1600
},
{
"epoch": 0.25810038119440915,
"grad_norm": 5.31698751449585,
"learning_rate": 0.00037463687150837986,
"loss": 0.2231,
"step": 1625
},
{
"epoch": 0.26207115628970773,
"grad_norm": 3.1410892009735107,
"learning_rate": 0.0003718435754189944,
"loss": 0.248,
"step": 1650
},
{
"epoch": 0.26604193138500637,
"grad_norm": 3.897778272628784,
"learning_rate": 0.0003690502793296089,
"loss": 0.271,
"step": 1675
},
{
"epoch": 0.27001270648030495,
"grad_norm": 2.355808734893799,
"learning_rate": 0.0003662569832402235,
"loss": 0.2293,
"step": 1700
},
{
"epoch": 0.27398348157560354,
"grad_norm": 3.172232151031494,
"learning_rate": 0.000363463687150838,
"loss": 0.1907,
"step": 1725
},
{
"epoch": 0.2779542566709022,
"grad_norm": 5.099759578704834,
"learning_rate": 0.0003606703910614525,
"loss": 0.2054,
"step": 1750
},
{
"epoch": 0.28192503176620076,
"grad_norm": 5.372403144836426,
"learning_rate": 0.00035787709497206704,
"loss": 0.2279,
"step": 1775
},
{
"epoch": 0.28589580686149935,
"grad_norm": 7.999370574951172,
"learning_rate": 0.00035508379888268154,
"loss": 0.1572,
"step": 1800
},
{
"epoch": 0.289866581956798,
"grad_norm": 12.339667320251465,
"learning_rate": 0.0003522905027932961,
"loss": 0.2401,
"step": 1825
},
{
"epoch": 0.29383735705209657,
"grad_norm": 7.104443073272705,
"learning_rate": 0.0003494972067039106,
"loss": 0.2333,
"step": 1850
},
{
"epoch": 0.29780813214739515,
"grad_norm": 8.25573444366455,
"learning_rate": 0.00034670391061452515,
"loss": 0.1648,
"step": 1875
},
{
"epoch": 0.3017789072426938,
"grad_norm": 5.5282769203186035,
"learning_rate": 0.00034391061452513966,
"loss": 0.2446,
"step": 1900
},
{
"epoch": 0.3057496823379924,
"grad_norm": 8.142168045043945,
"learning_rate": 0.0003411173184357542,
"loss": 0.2474,
"step": 1925
},
{
"epoch": 0.30972045743329096,
"grad_norm": 7.110402584075928,
"learning_rate": 0.0003383240223463687,
"loss": 0.184,
"step": 1950
},
{
"epoch": 0.3136912325285896,
"grad_norm": 6.967545509338379,
"learning_rate": 0.0003355307262569832,
"loss": 0.2391,
"step": 1975
},
{
"epoch": 0.3176620076238882,
"grad_norm": 7.029871463775635,
"learning_rate": 0.0003327374301675978,
"loss": 0.2114,
"step": 2000
},
{
"epoch": 0.32163278271918677,
"grad_norm": 8.513774871826172,
"learning_rate": 0.0003299441340782123,
"loss": 0.2483,
"step": 2025
},
{
"epoch": 0.3256035578144854,
"grad_norm": 6.16885232925415,
"learning_rate": 0.00032715083798882683,
"loss": 0.2206,
"step": 2050
},
{
"epoch": 0.329574332909784,
"grad_norm": 6.541902542114258,
"learning_rate": 0.00032435754189944133,
"loss": 0.223,
"step": 2075
},
{
"epoch": 0.3335451080050826,
"grad_norm": 3.3565759658813477,
"learning_rate": 0.0003215642458100559,
"loss": 0.1957,
"step": 2100
},
{
"epoch": 0.3375158831003812,
"grad_norm": 1.907651424407959,
"learning_rate": 0.0003187709497206704,
"loss": 0.271,
"step": 2125
},
{
"epoch": 0.3414866581956798,
"grad_norm": 4.328953742980957,
"learning_rate": 0.00031597765363128495,
"loss": 0.2432,
"step": 2150
},
{
"epoch": 0.3454574332909784,
"grad_norm": 2.5049920082092285,
"learning_rate": 0.00031318435754189945,
"loss": 0.2074,
"step": 2175
},
{
"epoch": 0.34942820838627703,
"grad_norm": 7.09634256362915,
"learning_rate": 0.00031039106145251395,
"loss": 0.1959,
"step": 2200
},
{
"epoch": 0.3533989834815756,
"grad_norm": 3.597811698913574,
"learning_rate": 0.0003075977653631285,
"loss": 0.2009,
"step": 2225
},
{
"epoch": 0.3573697585768742,
"grad_norm": 4.403627872467041,
"learning_rate": 0.000304804469273743,
"loss": 0.1956,
"step": 2250
},
{
"epoch": 0.3613405336721728,
"grad_norm": 14.672781944274902,
"learning_rate": 0.00030201117318435757,
"loss": 0.2425,
"step": 2275
},
{
"epoch": 0.3653113087674714,
"grad_norm": 3.903442621231079,
"learning_rate": 0.00029921787709497207,
"loss": 0.2176,
"step": 2300
},
{
"epoch": 0.36928208386277,
"grad_norm": 6.7449517250061035,
"learning_rate": 0.0002964245810055866,
"loss": 0.2667,
"step": 2325
},
{
"epoch": 0.3732528589580686,
"grad_norm": 6.445186614990234,
"learning_rate": 0.0002937430167597766,
"loss": 0.2148,
"step": 2350
},
{
"epoch": 0.37722363405336723,
"grad_norm": 5.590458869934082,
"learning_rate": 0.0002909497206703911,
"loss": 0.1853,
"step": 2375
},
{
"epoch": 0.3811944091486658,
"grad_norm": 3.372239351272583,
"learning_rate": 0.00028815642458100563,
"loss": 0.2079,
"step": 2400
},
{
"epoch": 0.3851651842439644,
"grad_norm": 8.069969177246094,
"learning_rate": 0.0002853631284916201,
"loss": 0.2099,
"step": 2425
},
{
"epoch": 0.38913595933926304,
"grad_norm": 3.9822771549224854,
"learning_rate": 0.00028256983240223464,
"loss": 0.2716,
"step": 2450
},
{
"epoch": 0.3931067344345616,
"grad_norm": 7.241533279418945,
"learning_rate": 0.00027977653631284914,
"loss": 0.2138,
"step": 2475
},
{
"epoch": 0.3970775095298602,
"grad_norm": 4.6112542152404785,
"learning_rate": 0.0002769832402234637,
"loss": 0.1957,
"step": 2500
},
{
"epoch": 0.3970775095298602,
"eval_loss": 0.20951329171657562,
"eval_runtime": 1414.685,
"eval_samples_per_second": 0.732,
"eval_steps_per_second": 0.366,
"eval_wer": 32.51578436134046,
"step": 2500
}
],
"logging_steps": 25,
"max_steps": 4975,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 2500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 2,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 1
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.09103814656e+19,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}