Whisper-Medium / trainer_state.json

Upload checkpoint directory

ce09534 verified 10 months ago

18.7 kB

	{
	"best_metric": 19.119313582645297,
	"best_model_checkpoint": "./whisper-medium-hi32/checkpoint-1250",
	"epoch": 0.3970775095298602,
	"eval_steps": 1250,
	"global_step": 2500,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0039707750952986025,
	"grad_norm": 5.025813579559326,
	"learning_rate": 2.5e-05,
	"loss": 1.0996,
	"step": 25
	},
	{
	"epoch": 0.007941550190597205,
	"grad_norm": 4.760560989379883,
	"learning_rate": 5e-05,
	"loss": 0.8774,
	"step": 50
	},
	{
	"epoch": 0.011912325285895807,
	"grad_norm": 4.298652648925781,
	"learning_rate": 7.5e-05,
	"loss": 0.3753,
	"step": 75
	},
	{
	"epoch": 0.01588310038119441,
	"grad_norm": 4.617311477661133,
	"learning_rate": 0.0001,
	"loss": 0.3013,
	"step": 100
	},
	{
	"epoch": 0.01985387547649301,
	"grad_norm": 9.100639343261719,
	"learning_rate": 0.000125,
	"loss": 0.2996,
	"step": 125
	},
	{
	"epoch": 0.023824650571791613,
	"grad_norm": 3.8302597999572754,
	"learning_rate": 0.00015,
	"loss": 0.2166,
	"step": 150
	},
	{
	"epoch": 0.027795425667090215,
	"grad_norm": 3.0683753490448,
	"learning_rate": 0.000175,
	"loss": 0.2249,
	"step": 175
	},
	{
	"epoch": 0.03176620076238882,
	"grad_norm": 3.0955100059509277,
	"learning_rate": 0.0002,
	"loss": 0.2256,
	"step": 200
	},
	{
	"epoch": 0.03573697585768742,
	"grad_norm": 8.42872142791748,
	"learning_rate": 0.00022500000000000002,
	"loss": 0.2054,
	"step": 225
	},
	{
	"epoch": 0.03970775095298602,
	"grad_norm": 1.8967641592025757,
	"learning_rate": 0.00025,
	"loss": 0.2378,
	"step": 250
	},
	{
	"epoch": 0.04367852604828463,
	"grad_norm": 3.759864568710327,
	"learning_rate": 0.000275,
	"loss": 0.2663,
	"step": 275
	},
	{
	"epoch": 0.04764930114358323,
	"grad_norm": 3.7866759300231934,
	"learning_rate": 0.0003,
	"loss": 0.2715,
	"step": 300
	},
	{
	"epoch": 0.05162007623888183,
	"grad_norm": 3.0431158542633057,
	"learning_rate": 0.00032500000000000004,
	"loss": 0.2401,
	"step": 325
	},
	{
	"epoch": 0.05559085133418043,
	"grad_norm": 5.701106071472168,
	"learning_rate": 0.00035,
	"loss": 0.2534,
	"step": 350
	},
	{
	"epoch": 0.059561626429479035,
	"grad_norm": 2.9990756511688232,
	"learning_rate": 0.000375,
	"loss": 0.204,
	"step": 375
	},
	{
	"epoch": 0.06353240152477764,
	"grad_norm": 5.0229573249816895,
	"learning_rate": 0.0004,
	"loss": 0.2318,
	"step": 400
	},
	{
	"epoch": 0.06750317662007624,
	"grad_norm": 5.128113746643066,
	"learning_rate": 0.000425,
	"loss": 0.2111,
	"step": 425
	},
	{
	"epoch": 0.07147395171537484,
	"grad_norm": 3.8650593757629395,
	"learning_rate": 0.000449,
	"loss": 0.2493,
	"step": 450
	},
	{
	"epoch": 0.07544472681067345,
	"grad_norm": 7.482765197753906,
	"learning_rate": 0.000474,
	"loss": 0.3081,
	"step": 475
	},
	{
	"epoch": 0.07941550190597205,
	"grad_norm": 4.78742790222168,
	"learning_rate": 0.000499,
	"loss": 0.2288,
	"step": 500
	},
	{
	"epoch": 0.08338627700127065,
	"grad_norm": 1.9080342054367065,
	"learning_rate": 0.00049731843575419,
	"loss": 0.2175,
	"step": 525
	},
	{
	"epoch": 0.08735705209656926,
	"grad_norm": 6.20205545425415,
	"learning_rate": 0.0004945251396648045,
	"loss": 0.2218,
	"step": 550
	},
	{
	"epoch": 0.09132782719186786,
	"grad_norm": 6.478557586669922,
	"learning_rate": 0.000491731843575419,
	"loss": 0.2954,
	"step": 575
	},
	{
	"epoch": 0.09529860228716645,
	"grad_norm": 8.531637191772461,
	"learning_rate": 0.0004889385474860335,
	"loss": 0.2223,
	"step": 600
	},
	{
	"epoch": 0.09926937738246505,
	"grad_norm": 4.512006759643555,
	"learning_rate": 0.0004861452513966481,
	"loss": 0.2208,
	"step": 625
	},
	{
	"epoch": 0.10324015247776366,
	"grad_norm": 2.6392629146575928,
	"learning_rate": 0.0004833519553072626,
	"loss": 0.2348,
	"step": 650
	},
	{
	"epoch": 0.10721092757306226,
	"grad_norm": 3.4534718990325928,
	"learning_rate": 0.00048055865921787713,
	"loss": 0.2385,
	"step": 675
	},
	{
	"epoch": 0.11118170266836086,
	"grad_norm": 3.7822482585906982,
	"learning_rate": 0.00047776536312849164,
	"loss": 0.288,
	"step": 700
	},
	{
	"epoch": 0.11515247776365947,
	"grad_norm": 4.008564472198486,
	"learning_rate": 0.0004749720670391062,
	"loss": 0.2389,
	"step": 725
	},
	{
	"epoch": 0.11912325285895807,
	"grad_norm": 4.458996772766113,
	"learning_rate": 0.0004721787709497207,
	"loss": 0.2875,
	"step": 750
	},
	{
	"epoch": 0.12309402795425667,
	"grad_norm": 6.05142068862915,
	"learning_rate": 0.00046938547486033525,
	"loss": 0.2694,
	"step": 775
	},
	{
	"epoch": 0.12706480304955528,
	"grad_norm": 8.252070426940918,
	"learning_rate": 0.00046659217877094975,
	"loss": 0.2158,
	"step": 800
	},
	{
	"epoch": 0.13103557814485386,
	"grad_norm": 3.173055410385132,
	"learning_rate": 0.0004637988826815643,
	"loss": 0.2154,
	"step": 825
	},
	{
	"epoch": 0.13500635324015248,
	"grad_norm": 1.4051086902618408,
	"learning_rate": 0.00046100558659217876,
	"loss": 0.2297,
	"step": 850
	},
	{
	"epoch": 0.1389771283354511,
	"grad_norm": 10.670642852783203,
	"learning_rate": 0.00045821229050279326,
	"loss": 0.2181,
	"step": 875
	},
	{
	"epoch": 0.14294790343074967,
	"grad_norm": 3.286973476409912,
	"learning_rate": 0.00045553072625698326,
	"loss": 0.2543,
	"step": 900
	},
	{
	"epoch": 0.14691867852604829,
	"grad_norm": 3.300940752029419,
	"learning_rate": 0.00045273743016759776,
	"loss": 0.222,
	"step": 925
	},
	{
	"epoch": 0.1508894536213469,
	"grad_norm": 3.951962947845459,
	"learning_rate": 0.00044994413407821226,
	"loss": 0.2792,
	"step": 950
	},
	{
	"epoch": 0.15486022871664548,
	"grad_norm": 4.659793853759766,
	"learning_rate": 0.0004471508379888268,
	"loss": 0.2409,
	"step": 975
	},
	{
	"epoch": 0.1588310038119441,
	"grad_norm": 8.52952766418457,
	"learning_rate": 0.0004443575418994413,
	"loss": 0.2745,
	"step": 1000
	},
	{
	"epoch": 0.1628017789072427,
	"grad_norm": 4.599229335784912,
	"learning_rate": 0.0004415642458100559,
	"loss": 0.2656,
	"step": 1025
	},
	{
	"epoch": 0.1667725540025413,
	"grad_norm": 3.206956386566162,
	"learning_rate": 0.00043888268156424583,
	"loss": 0.2268,
	"step": 1050
	},
	{
	"epoch": 0.1707433290978399,
	"grad_norm": 4.573523998260498,
	"learning_rate": 0.00043608938547486033,
	"loss": 0.2322,
	"step": 1075
	},
	{
	"epoch": 0.17471410419313851,
	"grad_norm": 2.311208963394165,
	"learning_rate": 0.0004332960893854749,
	"loss": 0.2445,
	"step": 1100
	},
	{
	"epoch": 0.1786848792884371,
	"grad_norm": 12.883535385131836,
	"learning_rate": 0.0004305027932960894,
	"loss": 0.2932,
	"step": 1125
	},
	{
	"epoch": 0.1826556543837357,
	"grad_norm": 3.4275145530700684,
	"learning_rate": 0.00042770949720670394,
	"loss": 0.2642,
	"step": 1150
	},
	{
	"epoch": 0.1866264294790343,
	"grad_norm": 4.927622318267822,
	"learning_rate": 0.00042491620111731845,
	"loss": 0.2854,
	"step": 1175
	},
	{
	"epoch": 0.1905972045743329,
	"grad_norm": 3.562868595123291,
	"learning_rate": 0.000422122905027933,
	"loss": 0.2408,
	"step": 1200
	},
	{
	"epoch": 0.19456797966963152,
	"grad_norm": 6.159071445465088,
	"learning_rate": 0.0004193296089385475,
	"loss": 0.262,
	"step": 1225
	},
	{
	"epoch": 0.1985387547649301,
	"grad_norm": 6.2040114402771,
	"learning_rate": 0.00041653631284916206,
	"loss": 0.3184,
	"step": 1250
	},
	{
	"epoch": 0.1985387547649301,
	"eval_loss": 0.2267104983329773,
	"eval_runtime": 1316.6693,
	"eval_samples_per_second": 0.787,
	"eval_steps_per_second": 0.393,
	"eval_wer": 19.119313582645297,
	"step": 1250
	},
	{
	"epoch": 0.20250952986022872,
	"grad_norm": 5.0747246742248535,
	"learning_rate": 0.00041374301675977656,
	"loss": 0.2806,
	"step": 1275
	},
	{
	"epoch": 0.20648030495552733,
	"grad_norm": 7.066751003265381,
	"learning_rate": 0.00041094972067039106,
	"loss": 0.2596,
	"step": 1300
	},
	{
	"epoch": 0.2104510800508259,
	"grad_norm": 3.551501989364624,
	"learning_rate": 0.0004081564245810056,
	"loss": 0.2609,
	"step": 1325
	},
	{
	"epoch": 0.21442185514612452,
	"grad_norm": 4.077850341796875,
	"learning_rate": 0.0004053631284916201,
	"loss": 0.2232,
	"step": 1350
	},
	{
	"epoch": 0.21839263024142314,
	"grad_norm": 3.4992265701293945,
	"learning_rate": 0.0004025698324022347,
	"loss": 0.2224,
	"step": 1375
	},
	{
	"epoch": 0.22236340533672172,
	"grad_norm": 3.0660696029663086,
	"learning_rate": 0.0003997765363128492,
	"loss": 0.2006,
	"step": 1400
	},
	{
	"epoch": 0.22633418043202033,
	"grad_norm": 8.794944763183594,
	"learning_rate": 0.00039698324022346374,
	"loss": 0.2199,
	"step": 1425
	},
	{
	"epoch": 0.23030495552731894,
	"grad_norm": 3.5407402515411377,
	"learning_rate": 0.00039418994413407824,
	"loss": 0.2547,
	"step": 1450
	},
	{
	"epoch": 0.23427573062261753,
	"grad_norm": 5.331219673156738,
	"learning_rate": 0.0003913966480446928,
	"loss": 0.2058,
	"step": 1475
	},
	{
	"epoch": 0.23824650571791614,
	"grad_norm": 7.421133995056152,
	"learning_rate": 0.0003886033519553073,
	"loss": 0.2284,
	"step": 1500
	},
	{
	"epoch": 0.24221728081321475,
	"grad_norm": 6.895657539367676,
	"learning_rate": 0.00038581005586592175,
	"loss": 0.2323,
	"step": 1525
	},
	{
	"epoch": 0.24618805590851334,
	"grad_norm": 1.489917278289795,
	"learning_rate": 0.0003830167597765363,
	"loss": 0.2418,
	"step": 1550
	},
	{
	"epoch": 0.2501588310038119,
	"grad_norm": 3.7984824180603027,
	"learning_rate": 0.0003802234636871508,
	"loss": 0.2223,
	"step": 1575
	},
	{
	"epoch": 0.25412960609911056,
	"grad_norm": 3.4480276107788086,
	"learning_rate": 0.00037743016759776536,
	"loss": 0.2672,
	"step": 1600
	},
	{
	"epoch": 0.25810038119440915,
	"grad_norm": 5.31698751449585,
	"learning_rate": 0.00037463687150837986,
	"loss": 0.2231,
	"step": 1625
	},
	{
	"epoch": 0.26207115628970773,
	"grad_norm": 3.1410892009735107,
	"learning_rate": 0.0003718435754189944,
	"loss": 0.248,
	"step": 1650
	},
	{
	"epoch": 0.26604193138500637,
	"grad_norm": 3.897778272628784,
	"learning_rate": 0.0003690502793296089,
	"loss": 0.271,
	"step": 1675
	},
	{
	"epoch": 0.27001270648030495,
	"grad_norm": 2.355808734893799,
	"learning_rate": 0.0003662569832402235,
	"loss": 0.2293,
	"step": 1700
	},
	{
	"epoch": 0.27398348157560354,
	"grad_norm": 3.172232151031494,
	"learning_rate": 0.000363463687150838,
	"loss": 0.1907,
	"step": 1725
	},
	{
	"epoch": 0.2779542566709022,
	"grad_norm": 5.099759578704834,
	"learning_rate": 0.0003606703910614525,
	"loss": 0.2054,
	"step": 1750
	},
	{
	"epoch": 0.28192503176620076,
	"grad_norm": 5.372403144836426,
	"learning_rate": 0.00035787709497206704,
	"loss": 0.2279,
	"step": 1775
	},
	{
	"epoch": 0.28589580686149935,
	"grad_norm": 7.999370574951172,
	"learning_rate": 0.00035508379888268154,
	"loss": 0.1572,
	"step": 1800
	},
	{
	"epoch": 0.289866581956798,
	"grad_norm": 12.339667320251465,
	"learning_rate": 0.0003522905027932961,
	"loss": 0.2401,
	"step": 1825
	},
	{
	"epoch": 0.29383735705209657,
	"grad_norm": 7.104443073272705,
	"learning_rate": 0.0003494972067039106,
	"loss": 0.2333,
	"step": 1850
	},
	{
	"epoch": 0.29780813214739515,
	"grad_norm": 8.25573444366455,
	"learning_rate": 0.00034670391061452515,
	"loss": 0.1648,
	"step": 1875
	},
	{
	"epoch": 0.3017789072426938,
	"grad_norm": 5.5282769203186035,
	"learning_rate": 0.00034391061452513966,
	"loss": 0.2446,
	"step": 1900
	},
	{
	"epoch": 0.3057496823379924,
	"grad_norm": 8.142168045043945,
	"learning_rate": 0.0003411173184357542,
	"loss": 0.2474,
	"step": 1925
	},
	{
	"epoch": 0.30972045743329096,
	"grad_norm": 7.110402584075928,
	"learning_rate": 0.0003383240223463687,
	"loss": 0.184,
	"step": 1950
	},
	{
	"epoch": 0.3136912325285896,
	"grad_norm": 6.967545509338379,
	"learning_rate": 0.0003355307262569832,
	"loss": 0.2391,
	"step": 1975
	},
	{
	"epoch": 0.3176620076238882,
	"grad_norm": 7.029871463775635,
	"learning_rate": 0.0003327374301675978,
	"loss": 0.2114,
	"step": 2000
	},
	{
	"epoch": 0.32163278271918677,
	"grad_norm": 8.513774871826172,
	"learning_rate": 0.0003299441340782123,
	"loss": 0.2483,
	"step": 2025
	},
	{
	"epoch": 0.3256035578144854,
	"grad_norm": 6.16885232925415,
	"learning_rate": 0.00032715083798882683,
	"loss": 0.2206,
	"step": 2050
	},
	{
	"epoch": 0.329574332909784,
	"grad_norm": 6.541902542114258,
	"learning_rate": 0.00032435754189944133,
	"loss": 0.223,
	"step": 2075
	},
	{
	"epoch": 0.3335451080050826,
	"grad_norm": 3.3565759658813477,
	"learning_rate": 0.0003215642458100559,
	"loss": 0.1957,
	"step": 2100
	},
	{
	"epoch": 0.3375158831003812,
	"grad_norm": 1.907651424407959,
	"learning_rate": 0.0003187709497206704,
	"loss": 0.271,
	"step": 2125
	},
	{
	"epoch": 0.3414866581956798,
	"grad_norm": 4.328953742980957,
	"learning_rate": 0.00031597765363128495,
	"loss": 0.2432,
	"step": 2150
	},
	{
	"epoch": 0.3454574332909784,
	"grad_norm": 2.5049920082092285,
	"learning_rate": 0.00031318435754189945,
	"loss": 0.2074,
	"step": 2175
	},
	{
	"epoch": 0.34942820838627703,
	"grad_norm": 7.09634256362915,
	"learning_rate": 0.00031039106145251395,
	"loss": 0.1959,
	"step": 2200
	},
	{
	"epoch": 0.3533989834815756,
	"grad_norm": 3.597811698913574,
	"learning_rate": 0.0003075977653631285,
	"loss": 0.2009,
	"step": 2225
	},
	{
	"epoch": 0.3573697585768742,
	"grad_norm": 4.403627872467041,
	"learning_rate": 0.000304804469273743,
	"loss": 0.1956,
	"step": 2250
	},
	{
	"epoch": 0.3613405336721728,
	"grad_norm": 14.672781944274902,
	"learning_rate": 0.00030201117318435757,
	"loss": 0.2425,
	"step": 2275
	},
	{
	"epoch": 0.3653113087674714,
	"grad_norm": 3.903442621231079,
	"learning_rate": 0.00029921787709497207,
	"loss": 0.2176,
	"step": 2300
	},
	{
	"epoch": 0.36928208386277,
	"grad_norm": 6.7449517250061035,
	"learning_rate": 0.0002964245810055866,
	"loss": 0.2667,
	"step": 2325
	},
	{
	"epoch": 0.3732528589580686,
	"grad_norm": 6.445186614990234,
	"learning_rate": 0.0002937430167597766,
	"loss": 0.2148,
	"step": 2350
	},
	{
	"epoch": 0.37722363405336723,
	"grad_norm": 5.590458869934082,
	"learning_rate": 0.0002909497206703911,
	"loss": 0.1853,
	"step": 2375
	},
	{
	"epoch": 0.3811944091486658,
	"grad_norm": 3.372239351272583,
	"learning_rate": 0.00028815642458100563,
	"loss": 0.2079,
	"step": 2400
	},
	{
	"epoch": 0.3851651842439644,
	"grad_norm": 8.069969177246094,
	"learning_rate": 0.0002853631284916201,
	"loss": 0.2099,
	"step": 2425
	},
	{
	"epoch": 0.38913595933926304,
	"grad_norm": 3.9822771549224854,
	"learning_rate": 0.00028256983240223464,
	"loss": 0.2716,
	"step": 2450
	},
	{
	"epoch": 0.3931067344345616,
	"grad_norm": 7.241533279418945,
	"learning_rate": 0.00027977653631284914,
	"loss": 0.2138,
	"step": 2475
	},
	{
	"epoch": 0.3970775095298602,
	"grad_norm": 4.6112542152404785,
	"learning_rate": 0.0002769832402234637,
	"loss": 0.1957,
	"step": 2500
	},
	{
	"epoch": 0.3970775095298602,
	"eval_loss": 0.20951329171657562,
	"eval_runtime": 1414.685,
	"eval_samples_per_second": 0.732,
	"eval_steps_per_second": 0.366,
	"eval_wer": 32.51578436134046,
	"step": 2500
	}
	],
	"logging_steps": 25,
	"max_steps": 4975,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 2500,
	"stateful_callbacks": {
	"EarlyStoppingCallback": {
	"args": {
	"early_stopping_patience": 2,
	"early_stopping_threshold": 0.0
	},
	"attributes": {
	"early_stopping_patience_counter": 1
	}
	},
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 2.09103814656e+19,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}