w6 / trainer_state.json
manuth's picture
Uploadomg the model
e75cce8 verified
{
"best_global_step": 1400,
"best_metric": 0.062412777174755775,
"best_model_checkpoint": "/content/drive/MyDrive/ABA Projects/Speech-To-Text/models/Under9/KB_800_aug_time1x5_cc_sample/checkpoint-1400",
"epoch": 3.0,
"eval_steps": 200,
"global_step": 1617,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03710575139146568,
"grad_norm": 1.6889166831970215,
"learning_rate": 3.8e-07,
"loss": 0.0498,
"step": 20
},
{
"epoch": 0.07421150278293136,
"grad_norm": 1.2946438789367676,
"learning_rate": 7.8e-07,
"loss": 0.0402,
"step": 40
},
{
"epoch": 0.11131725417439703,
"grad_norm": 1.791599154472351,
"learning_rate": 1.1800000000000001e-06,
"loss": 0.0364,
"step": 60
},
{
"epoch": 0.14842300556586271,
"grad_norm": 1.2460503578186035,
"learning_rate": 1.5800000000000001e-06,
"loss": 0.0308,
"step": 80
},
{
"epoch": 0.18552875695732837,
"grad_norm": 0.5869728326797485,
"learning_rate": 1.98e-06,
"loss": 0.0251,
"step": 100
},
{
"epoch": 0.22263450834879406,
"grad_norm": 0.9444546699523926,
"learning_rate": 2.38e-06,
"loss": 0.0253,
"step": 120
},
{
"epoch": 0.2597402597402597,
"grad_norm": 1.2589209079742432,
"learning_rate": 2.7800000000000005e-06,
"loss": 0.021,
"step": 140
},
{
"epoch": 0.29684601113172543,
"grad_norm": 0.8687691688537598,
"learning_rate": 3.1800000000000005e-06,
"loss": 0.0164,
"step": 160
},
{
"epoch": 0.3339517625231911,
"grad_norm": 0.7628278136253357,
"learning_rate": 3.58e-06,
"loss": 0.0184,
"step": 180
},
{
"epoch": 0.37105751391465674,
"grad_norm": 0.7672299742698669,
"learning_rate": 3.980000000000001e-06,
"loss": 0.016,
"step": 200
},
{
"epoch": 0.37105751391465674,
"eval_loss": 0.0242678951472044,
"eval_runtime": 531.9148,
"eval_samples_per_second": 1.705,
"eval_steps_per_second": 0.055,
"eval_wer": 0.09970538067917506,
"step": 200
},
{
"epoch": 0.40816326530612246,
"grad_norm": 0.6221576929092407,
"learning_rate": 4.38e-06,
"loss": 0.0166,
"step": 220
},
{
"epoch": 0.4452690166975881,
"grad_norm": 0.6783678531646729,
"learning_rate": 4.78e-06,
"loss": 0.0158,
"step": 240
},
{
"epoch": 0.48237476808905383,
"grad_norm": 1.1848996877670288,
"learning_rate": 5.18e-06,
"loss": 0.0133,
"step": 260
},
{
"epoch": 0.5194805194805194,
"grad_norm": 0.6004874110221863,
"learning_rate": 5.580000000000001e-06,
"loss": 0.0142,
"step": 280
},
{
"epoch": 0.5565862708719852,
"grad_norm": 0.8510653972625732,
"learning_rate": 5.98e-06,
"loss": 0.0159,
"step": 300
},
{
"epoch": 0.5936920222634509,
"grad_norm": 0.74302738904953,
"learning_rate": 6.380000000000001e-06,
"loss": 0.0164,
"step": 320
},
{
"epoch": 0.6307977736549165,
"grad_norm": 0.7362753748893738,
"learning_rate": 6.780000000000001e-06,
"loss": 0.0145,
"step": 340
},
{
"epoch": 0.6679035250463822,
"grad_norm": 0.762110710144043,
"learning_rate": 7.180000000000001e-06,
"loss": 0.0122,
"step": 360
},
{
"epoch": 0.7050092764378478,
"grad_norm": 1.0113365650177002,
"learning_rate": 7.58e-06,
"loss": 0.0124,
"step": 380
},
{
"epoch": 0.7421150278293135,
"grad_norm": 1.097764253616333,
"learning_rate": 7.980000000000002e-06,
"loss": 0.0128,
"step": 400
},
{
"epoch": 0.7421150278293135,
"eval_loss": 0.0212822575122118,
"eval_runtime": 527.7384,
"eval_samples_per_second": 1.719,
"eval_steps_per_second": 0.055,
"eval_wer": 0.08885098464878276,
"step": 400
},
{
"epoch": 0.7792207792207793,
"grad_norm": 0.7167023420333862,
"learning_rate": 8.380000000000001e-06,
"loss": 0.0122,
"step": 420
},
{
"epoch": 0.8163265306122449,
"grad_norm": 0.9013610482215881,
"learning_rate": 8.78e-06,
"loss": 0.014,
"step": 440
},
{
"epoch": 0.8534322820037106,
"grad_norm": 0.8653533458709717,
"learning_rate": 9.180000000000002e-06,
"loss": 0.0154,
"step": 460
},
{
"epoch": 0.8905380333951762,
"grad_norm": 0.557327151298523,
"learning_rate": 9.58e-06,
"loss": 0.0145,
"step": 480
},
{
"epoch": 0.9276437847866419,
"grad_norm": 2.3209738731384277,
"learning_rate": 9.980000000000001e-06,
"loss": 0.0145,
"step": 500
},
{
"epoch": 0.9647495361781077,
"grad_norm": 1.026639461517334,
"learning_rate": 9.829901521933752e-06,
"loss": 0.0123,
"step": 520
},
{
"epoch": 1.0018552875695732,
"grad_norm": 0.7530286908149719,
"learning_rate": 9.650850492390333e-06,
"loss": 0.0173,
"step": 540
},
{
"epoch": 1.0389610389610389,
"grad_norm": 0.634614884853363,
"learning_rate": 9.471799462846912e-06,
"loss": 0.0103,
"step": 560
},
{
"epoch": 1.0760667903525047,
"grad_norm": 0.6744325160980225,
"learning_rate": 9.292748433303492e-06,
"loss": 0.0078,
"step": 580
},
{
"epoch": 1.1131725417439704,
"grad_norm": 0.7800220847129822,
"learning_rate": 9.113697403760073e-06,
"loss": 0.0094,
"step": 600
},
{
"epoch": 1.1131725417439704,
"eval_loss": 0.020770801231265068,
"eval_runtime": 527.2581,
"eval_samples_per_second": 1.72,
"eval_steps_per_second": 0.055,
"eval_wer": 0.0897813614513878,
"step": 600
},
{
"epoch": 1.150278293135436,
"grad_norm": 0.4924355447292328,
"learning_rate": 8.934646374216652e-06,
"loss": 0.0068,
"step": 620
},
{
"epoch": 1.1873840445269017,
"grad_norm": 0.393308162689209,
"learning_rate": 8.755595344673232e-06,
"loss": 0.0102,
"step": 640
},
{
"epoch": 1.2244897959183674,
"grad_norm": 0.4669530391693115,
"learning_rate": 8.576544315129813e-06,
"loss": 0.0061,
"step": 660
},
{
"epoch": 1.261595547309833,
"grad_norm": 0.4678107500076294,
"learning_rate": 8.397493285586394e-06,
"loss": 0.0058,
"step": 680
},
{
"epoch": 1.2987012987012987,
"grad_norm": 0.2520297169685364,
"learning_rate": 8.218442256042973e-06,
"loss": 0.0059,
"step": 700
},
{
"epoch": 1.3358070500927643,
"grad_norm": 0.5356913208961487,
"learning_rate": 8.039391226499553e-06,
"loss": 0.0048,
"step": 720
},
{
"epoch": 1.37291280148423,
"grad_norm": 0.5626540184020996,
"learning_rate": 7.860340196956134e-06,
"loss": 0.0095,
"step": 740
},
{
"epoch": 1.4100185528756957,
"grad_norm": 0.42257481813430786,
"learning_rate": 7.681289167412713e-06,
"loss": 0.0062,
"step": 760
},
{
"epoch": 1.4471243042671613,
"grad_norm": 0.45789048075675964,
"learning_rate": 7.5022381378692935e-06,
"loss": 0.0068,
"step": 780
},
{
"epoch": 1.4842300556586272,
"grad_norm": 0.19930215179920197,
"learning_rate": 7.323187108325873e-06,
"loss": 0.0062,
"step": 800
},
{
"epoch": 1.4842300556586272,
"eval_loss": 0.017798766493797302,
"eval_runtime": 530.0679,
"eval_samples_per_second": 1.711,
"eval_steps_per_second": 0.055,
"eval_wer": 0.07869437122034424,
"step": 800
},
{
"epoch": 1.5213358070500926,
"grad_norm": 0.39932870864868164,
"learning_rate": 7.144136078782453e-06,
"loss": 0.0055,
"step": 820
},
{
"epoch": 1.5584415584415585,
"grad_norm": 0.44049975275993347,
"learning_rate": 6.9650850492390334e-06,
"loss": 0.0045,
"step": 840
},
{
"epoch": 1.595547309833024,
"grad_norm": 0.653985857963562,
"learning_rate": 6.7860340196956146e-06,
"loss": 0.0046,
"step": 860
},
{
"epoch": 1.6326530612244898,
"grad_norm": 0.32906222343444824,
"learning_rate": 6.606982990152194e-06,
"loss": 0.0038,
"step": 880
},
{
"epoch": 1.6697588126159555,
"grad_norm": 0.23523864150047302,
"learning_rate": 6.427931960608774e-06,
"loss": 0.0048,
"step": 900
},
{
"epoch": 1.7068645640074211,
"grad_norm": 0.6168527603149414,
"learning_rate": 6.2488809310653545e-06,
"loss": 0.0056,
"step": 920
},
{
"epoch": 1.7439703153988868,
"grad_norm": 0.3621886968612671,
"learning_rate": 6.069829901521934e-06,
"loss": 0.0043,
"step": 940
},
{
"epoch": 1.7810760667903525,
"grad_norm": 0.47206345200538635,
"learning_rate": 5.890778871978514e-06,
"loss": 0.0047,
"step": 960
},
{
"epoch": 1.8181818181818183,
"grad_norm": 0.3675802946090698,
"learning_rate": 5.7117278424350944e-06,
"loss": 0.0036,
"step": 980
},
{
"epoch": 1.8552875695732838,
"grad_norm": 0.2902631163597107,
"learning_rate": 5.532676812891674e-06,
"loss": 0.0053,
"step": 1000
},
{
"epoch": 1.8552875695732838,
"eval_loss": 0.016478832811117172,
"eval_runtime": 528.8884,
"eval_samples_per_second": 1.715,
"eval_steps_per_second": 0.055,
"eval_wer": 0.07528298961079237,
"step": 1000
},
{
"epoch": 1.8923933209647497,
"grad_norm": 0.25338953733444214,
"learning_rate": 5.353625783348255e-06,
"loss": 0.0038,
"step": 1020
},
{
"epoch": 1.929499072356215,
"grad_norm": 0.499541699886322,
"learning_rate": 5.174574753804835e-06,
"loss": 0.0032,
"step": 1040
},
{
"epoch": 1.966604823747681,
"grad_norm": 0.2459595799446106,
"learning_rate": 4.9955237242614155e-06,
"loss": 0.0034,
"step": 1060
},
{
"epoch": 2.0037105751391464,
"grad_norm": 0.17086371779441833,
"learning_rate": 4.816472694717995e-06,
"loss": 0.0034,
"step": 1080
},
{
"epoch": 2.0408163265306123,
"grad_norm": 0.33610305190086365,
"learning_rate": 4.637421665174575e-06,
"loss": 0.0022,
"step": 1100
},
{
"epoch": 2.0779220779220777,
"grad_norm": 0.1726388782262802,
"learning_rate": 4.4583706356311554e-06,
"loss": 0.0019,
"step": 1120
},
{
"epoch": 2.1150278293135436,
"grad_norm": 0.5672515630722046,
"learning_rate": 4.279319606087735e-06,
"loss": 0.0018,
"step": 1140
},
{
"epoch": 2.1521335807050095,
"grad_norm": 0.18765456974506378,
"learning_rate": 4.100268576544316e-06,
"loss": 0.0018,
"step": 1160
},
{
"epoch": 2.189239332096475,
"grad_norm": 0.6721988320350647,
"learning_rate": 3.921217547000895e-06,
"loss": 0.0018,
"step": 1180
},
{
"epoch": 2.226345083487941,
"grad_norm": 0.2910846471786499,
"learning_rate": 3.7421665174574756e-06,
"loss": 0.0015,
"step": 1200
},
{
"epoch": 2.226345083487941,
"eval_loss": 0.016317173838615417,
"eval_runtime": 528.0525,
"eval_samples_per_second": 1.718,
"eval_steps_per_second": 0.055,
"eval_wer": 0.0654365017832222,
"step": 1200
},
{
"epoch": 2.2634508348794062,
"grad_norm": 0.2232552319765091,
"learning_rate": 3.5631154879140555e-06,
"loss": 0.0014,
"step": 1220
},
{
"epoch": 2.300556586270872,
"grad_norm": 0.17460349202156067,
"learning_rate": 3.384064458370636e-06,
"loss": 0.001,
"step": 1240
},
{
"epoch": 2.3376623376623376,
"grad_norm": 0.11456964910030365,
"learning_rate": 3.205013428827216e-06,
"loss": 0.0013,
"step": 1260
},
{
"epoch": 2.3747680890538034,
"grad_norm": 0.06092933565378189,
"learning_rate": 3.0259623992837963e-06,
"loss": 0.0012,
"step": 1280
},
{
"epoch": 2.411873840445269,
"grad_norm": 0.13908065855503082,
"learning_rate": 2.846911369740376e-06,
"loss": 0.0014,
"step": 1300
},
{
"epoch": 2.4489795918367347,
"grad_norm": 0.0469084270298481,
"learning_rate": 2.667860340196957e-06,
"loss": 0.0017,
"step": 1320
},
{
"epoch": 2.4860853432282,
"grad_norm": 0.16005142033100128,
"learning_rate": 2.4888093106535366e-06,
"loss": 0.0013,
"step": 1340
},
{
"epoch": 2.523191094619666,
"grad_norm": 0.19354048371315002,
"learning_rate": 2.3097582811101165e-06,
"loss": 0.0013,
"step": 1360
},
{
"epoch": 2.5602968460111315,
"grad_norm": 0.06003783643245697,
"learning_rate": 2.1307072515666967e-06,
"loss": 0.0013,
"step": 1380
},
{
"epoch": 2.5974025974025974,
"grad_norm": 0.17973428964614868,
"learning_rate": 1.951656222023277e-06,
"loss": 0.001,
"step": 1400
},
{
"epoch": 2.5974025974025974,
"eval_loss": 0.015609463676810265,
"eval_runtime": 531.4747,
"eval_samples_per_second": 1.707,
"eval_steps_per_second": 0.055,
"eval_wer": 0.062412777174755775,
"step": 1400
},
{
"epoch": 2.6345083487940633,
"grad_norm": 0.30550289154052734,
"learning_rate": 1.7726051924798568e-06,
"loss": 0.0008,
"step": 1420
},
{
"epoch": 2.6716141001855287,
"grad_norm": 0.09037981182336807,
"learning_rate": 1.593554162936437e-06,
"loss": 0.0013,
"step": 1440
},
{
"epoch": 2.7087198515769946,
"grad_norm": 0.03478335589170456,
"learning_rate": 1.414503133393017e-06,
"loss": 0.0013,
"step": 1460
},
{
"epoch": 2.74582560296846,
"grad_norm": 0.15679460763931274,
"learning_rate": 1.2354521038495972e-06,
"loss": 0.0011,
"step": 1480
},
{
"epoch": 2.782931354359926,
"grad_norm": 0.23235130310058594,
"learning_rate": 1.0564010743061775e-06,
"loss": 0.0011,
"step": 1500
},
{
"epoch": 2.8200371057513913,
"grad_norm": 0.24431759119033813,
"learning_rate": 8.773500447627574e-07,
"loss": 0.0006,
"step": 1520
},
{
"epoch": 2.857142857142857,
"grad_norm": 0.12186744064092636,
"learning_rate": 6.982990152193376e-07,
"loss": 0.0022,
"step": 1540
},
{
"epoch": 2.8942486085343226,
"grad_norm": 0.20521441102027893,
"learning_rate": 5.192479856759177e-07,
"loss": 0.0008,
"step": 1560
},
{
"epoch": 2.9313543599257885,
"grad_norm": 0.17911091446876526,
"learning_rate": 3.401969561324978e-07,
"loss": 0.0008,
"step": 1580
},
{
"epoch": 2.9684601113172544,
"grad_norm": 0.7775314450263977,
"learning_rate": 1.611459265890779e-07,
"loss": 0.001,
"step": 1600
},
{
"epoch": 2.9684601113172544,
"eval_loss": 0.015125514939427376,
"eval_runtime": 530.4329,
"eval_samples_per_second": 1.71,
"eval_steps_per_second": 0.055,
"eval_wer": 0.06357574817801209,
"step": 1600
}
],
"logging_steps": 20,
"max_steps": 1617,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.806754683846656e+19,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}