| { | |
| "best_global_step": 1400, | |
| "best_metric": 0.062412777174755775, | |
| "best_model_checkpoint": "/content/drive/MyDrive/ABA Projects/Speech-To-Text/models/Under9/KB_800_aug_time1x5_cc_sample/checkpoint-1400", | |
| "epoch": 3.0, | |
| "eval_steps": 200, | |
| "global_step": 1617, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03710575139146568, | |
| "grad_norm": 1.6889166831970215, | |
| "learning_rate": 3.8e-07, | |
| "loss": 0.0498, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07421150278293136, | |
| "grad_norm": 1.2946438789367676, | |
| "learning_rate": 7.8e-07, | |
| "loss": 0.0402, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.11131725417439703, | |
| "grad_norm": 1.791599154472351, | |
| "learning_rate": 1.1800000000000001e-06, | |
| "loss": 0.0364, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.14842300556586271, | |
| "grad_norm": 1.2460503578186035, | |
| "learning_rate": 1.5800000000000001e-06, | |
| "loss": 0.0308, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.18552875695732837, | |
| "grad_norm": 0.5869728326797485, | |
| "learning_rate": 1.98e-06, | |
| "loss": 0.0251, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.22263450834879406, | |
| "grad_norm": 0.9444546699523926, | |
| "learning_rate": 2.38e-06, | |
| "loss": 0.0253, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2597402597402597, | |
| "grad_norm": 1.2589209079742432, | |
| "learning_rate": 2.7800000000000005e-06, | |
| "loss": 0.021, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.29684601113172543, | |
| "grad_norm": 0.8687691688537598, | |
| "learning_rate": 3.1800000000000005e-06, | |
| "loss": 0.0164, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3339517625231911, | |
| "grad_norm": 0.7628278136253357, | |
| "learning_rate": 3.58e-06, | |
| "loss": 0.0184, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.37105751391465674, | |
| "grad_norm": 0.7672299742698669, | |
| "learning_rate": 3.980000000000001e-06, | |
| "loss": 0.016, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.37105751391465674, | |
| "eval_loss": 0.0242678951472044, | |
| "eval_runtime": 531.9148, | |
| "eval_samples_per_second": 1.705, | |
| "eval_steps_per_second": 0.055, | |
| "eval_wer": 0.09970538067917506, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.40816326530612246, | |
| "grad_norm": 0.6221576929092407, | |
| "learning_rate": 4.38e-06, | |
| "loss": 0.0166, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4452690166975881, | |
| "grad_norm": 0.6783678531646729, | |
| "learning_rate": 4.78e-06, | |
| "loss": 0.0158, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.48237476808905383, | |
| "grad_norm": 1.1848996877670288, | |
| "learning_rate": 5.18e-06, | |
| "loss": 0.0133, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5194805194805194, | |
| "grad_norm": 0.6004874110221863, | |
| "learning_rate": 5.580000000000001e-06, | |
| "loss": 0.0142, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5565862708719852, | |
| "grad_norm": 0.8510653972625732, | |
| "learning_rate": 5.98e-06, | |
| "loss": 0.0159, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5936920222634509, | |
| "grad_norm": 0.74302738904953, | |
| "learning_rate": 6.380000000000001e-06, | |
| "loss": 0.0164, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6307977736549165, | |
| "grad_norm": 0.7362753748893738, | |
| "learning_rate": 6.780000000000001e-06, | |
| "loss": 0.0145, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6679035250463822, | |
| "grad_norm": 0.762110710144043, | |
| "learning_rate": 7.180000000000001e-06, | |
| "loss": 0.0122, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7050092764378478, | |
| "grad_norm": 1.0113365650177002, | |
| "learning_rate": 7.58e-06, | |
| "loss": 0.0124, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.7421150278293135, | |
| "grad_norm": 1.097764253616333, | |
| "learning_rate": 7.980000000000002e-06, | |
| "loss": 0.0128, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7421150278293135, | |
| "eval_loss": 0.0212822575122118, | |
| "eval_runtime": 527.7384, | |
| "eval_samples_per_second": 1.719, | |
| "eval_steps_per_second": 0.055, | |
| "eval_wer": 0.08885098464878276, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7792207792207793, | |
| "grad_norm": 0.7167023420333862, | |
| "learning_rate": 8.380000000000001e-06, | |
| "loss": 0.0122, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.8163265306122449, | |
| "grad_norm": 0.9013610482215881, | |
| "learning_rate": 8.78e-06, | |
| "loss": 0.014, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.8534322820037106, | |
| "grad_norm": 0.8653533458709717, | |
| "learning_rate": 9.180000000000002e-06, | |
| "loss": 0.0154, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.8905380333951762, | |
| "grad_norm": 0.557327151298523, | |
| "learning_rate": 9.58e-06, | |
| "loss": 0.0145, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.9276437847866419, | |
| "grad_norm": 2.3209738731384277, | |
| "learning_rate": 9.980000000000001e-06, | |
| "loss": 0.0145, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9647495361781077, | |
| "grad_norm": 1.026639461517334, | |
| "learning_rate": 9.829901521933752e-06, | |
| "loss": 0.0123, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.0018552875695732, | |
| "grad_norm": 0.7530286908149719, | |
| "learning_rate": 9.650850492390333e-06, | |
| "loss": 0.0173, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.0389610389610389, | |
| "grad_norm": 0.634614884853363, | |
| "learning_rate": 9.471799462846912e-06, | |
| "loss": 0.0103, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.0760667903525047, | |
| "grad_norm": 0.6744325160980225, | |
| "learning_rate": 9.292748433303492e-06, | |
| "loss": 0.0078, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.1131725417439704, | |
| "grad_norm": 0.7800220847129822, | |
| "learning_rate": 9.113697403760073e-06, | |
| "loss": 0.0094, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.1131725417439704, | |
| "eval_loss": 0.020770801231265068, | |
| "eval_runtime": 527.2581, | |
| "eval_samples_per_second": 1.72, | |
| "eval_steps_per_second": 0.055, | |
| "eval_wer": 0.0897813614513878, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.150278293135436, | |
| "grad_norm": 0.4924355447292328, | |
| "learning_rate": 8.934646374216652e-06, | |
| "loss": 0.0068, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.1873840445269017, | |
| "grad_norm": 0.393308162689209, | |
| "learning_rate": 8.755595344673232e-06, | |
| "loss": 0.0102, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.2244897959183674, | |
| "grad_norm": 0.4669530391693115, | |
| "learning_rate": 8.576544315129813e-06, | |
| "loss": 0.0061, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.261595547309833, | |
| "grad_norm": 0.4678107500076294, | |
| "learning_rate": 8.397493285586394e-06, | |
| "loss": 0.0058, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.2987012987012987, | |
| "grad_norm": 0.2520297169685364, | |
| "learning_rate": 8.218442256042973e-06, | |
| "loss": 0.0059, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.3358070500927643, | |
| "grad_norm": 0.5356913208961487, | |
| "learning_rate": 8.039391226499553e-06, | |
| "loss": 0.0048, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.37291280148423, | |
| "grad_norm": 0.5626540184020996, | |
| "learning_rate": 7.860340196956134e-06, | |
| "loss": 0.0095, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.4100185528756957, | |
| "grad_norm": 0.42257481813430786, | |
| "learning_rate": 7.681289167412713e-06, | |
| "loss": 0.0062, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.4471243042671613, | |
| "grad_norm": 0.45789048075675964, | |
| "learning_rate": 7.5022381378692935e-06, | |
| "loss": 0.0068, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.4842300556586272, | |
| "grad_norm": 0.19930215179920197, | |
| "learning_rate": 7.323187108325873e-06, | |
| "loss": 0.0062, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.4842300556586272, | |
| "eval_loss": 0.017798766493797302, | |
| "eval_runtime": 530.0679, | |
| "eval_samples_per_second": 1.711, | |
| "eval_steps_per_second": 0.055, | |
| "eval_wer": 0.07869437122034424, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.5213358070500926, | |
| "grad_norm": 0.39932870864868164, | |
| "learning_rate": 7.144136078782453e-06, | |
| "loss": 0.0055, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.5584415584415585, | |
| "grad_norm": 0.44049975275993347, | |
| "learning_rate": 6.9650850492390334e-06, | |
| "loss": 0.0045, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.595547309833024, | |
| "grad_norm": 0.653985857963562, | |
| "learning_rate": 6.7860340196956146e-06, | |
| "loss": 0.0046, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.6326530612244898, | |
| "grad_norm": 0.32906222343444824, | |
| "learning_rate": 6.606982990152194e-06, | |
| "loss": 0.0038, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.6697588126159555, | |
| "grad_norm": 0.23523864150047302, | |
| "learning_rate": 6.427931960608774e-06, | |
| "loss": 0.0048, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.7068645640074211, | |
| "grad_norm": 0.6168527603149414, | |
| "learning_rate": 6.2488809310653545e-06, | |
| "loss": 0.0056, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.7439703153988868, | |
| "grad_norm": 0.3621886968612671, | |
| "learning_rate": 6.069829901521934e-06, | |
| "loss": 0.0043, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.7810760667903525, | |
| "grad_norm": 0.47206345200538635, | |
| "learning_rate": 5.890778871978514e-06, | |
| "loss": 0.0047, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.8181818181818183, | |
| "grad_norm": 0.3675802946090698, | |
| "learning_rate": 5.7117278424350944e-06, | |
| "loss": 0.0036, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.8552875695732838, | |
| "grad_norm": 0.2902631163597107, | |
| "learning_rate": 5.532676812891674e-06, | |
| "loss": 0.0053, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.8552875695732838, | |
| "eval_loss": 0.016478832811117172, | |
| "eval_runtime": 528.8884, | |
| "eval_samples_per_second": 1.715, | |
| "eval_steps_per_second": 0.055, | |
| "eval_wer": 0.07528298961079237, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.8923933209647497, | |
| "grad_norm": 0.25338953733444214, | |
| "learning_rate": 5.353625783348255e-06, | |
| "loss": 0.0038, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.929499072356215, | |
| "grad_norm": 0.499541699886322, | |
| "learning_rate": 5.174574753804835e-06, | |
| "loss": 0.0032, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.966604823747681, | |
| "grad_norm": 0.2459595799446106, | |
| "learning_rate": 4.9955237242614155e-06, | |
| "loss": 0.0034, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.0037105751391464, | |
| "grad_norm": 0.17086371779441833, | |
| "learning_rate": 4.816472694717995e-06, | |
| "loss": 0.0034, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.0408163265306123, | |
| "grad_norm": 0.33610305190086365, | |
| "learning_rate": 4.637421665174575e-06, | |
| "loss": 0.0022, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.0779220779220777, | |
| "grad_norm": 0.1726388782262802, | |
| "learning_rate": 4.4583706356311554e-06, | |
| "loss": 0.0019, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.1150278293135436, | |
| "grad_norm": 0.5672515630722046, | |
| "learning_rate": 4.279319606087735e-06, | |
| "loss": 0.0018, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.1521335807050095, | |
| "grad_norm": 0.18765456974506378, | |
| "learning_rate": 4.100268576544316e-06, | |
| "loss": 0.0018, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.189239332096475, | |
| "grad_norm": 0.6721988320350647, | |
| "learning_rate": 3.921217547000895e-06, | |
| "loss": 0.0018, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.226345083487941, | |
| "grad_norm": 0.2910846471786499, | |
| "learning_rate": 3.7421665174574756e-06, | |
| "loss": 0.0015, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.226345083487941, | |
| "eval_loss": 0.016317173838615417, | |
| "eval_runtime": 528.0525, | |
| "eval_samples_per_second": 1.718, | |
| "eval_steps_per_second": 0.055, | |
| "eval_wer": 0.0654365017832222, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.2634508348794062, | |
| "grad_norm": 0.2232552319765091, | |
| "learning_rate": 3.5631154879140555e-06, | |
| "loss": 0.0014, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.300556586270872, | |
| "grad_norm": 0.17460349202156067, | |
| "learning_rate": 3.384064458370636e-06, | |
| "loss": 0.001, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.3376623376623376, | |
| "grad_norm": 0.11456964910030365, | |
| "learning_rate": 3.205013428827216e-06, | |
| "loss": 0.0013, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.3747680890538034, | |
| "grad_norm": 0.06092933565378189, | |
| "learning_rate": 3.0259623992837963e-06, | |
| "loss": 0.0012, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.411873840445269, | |
| "grad_norm": 0.13908065855503082, | |
| "learning_rate": 2.846911369740376e-06, | |
| "loss": 0.0014, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.4489795918367347, | |
| "grad_norm": 0.0469084270298481, | |
| "learning_rate": 2.667860340196957e-06, | |
| "loss": 0.0017, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.4860853432282, | |
| "grad_norm": 0.16005142033100128, | |
| "learning_rate": 2.4888093106535366e-06, | |
| "loss": 0.0013, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.523191094619666, | |
| "grad_norm": 0.19354048371315002, | |
| "learning_rate": 2.3097582811101165e-06, | |
| "loss": 0.0013, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.5602968460111315, | |
| "grad_norm": 0.06003783643245697, | |
| "learning_rate": 2.1307072515666967e-06, | |
| "loss": 0.0013, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.5974025974025974, | |
| "grad_norm": 0.17973428964614868, | |
| "learning_rate": 1.951656222023277e-06, | |
| "loss": 0.001, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.5974025974025974, | |
| "eval_loss": 0.015609463676810265, | |
| "eval_runtime": 531.4747, | |
| "eval_samples_per_second": 1.707, | |
| "eval_steps_per_second": 0.055, | |
| "eval_wer": 0.062412777174755775, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.6345083487940633, | |
| "grad_norm": 0.30550289154052734, | |
| "learning_rate": 1.7726051924798568e-06, | |
| "loss": 0.0008, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.6716141001855287, | |
| "grad_norm": 0.09037981182336807, | |
| "learning_rate": 1.593554162936437e-06, | |
| "loss": 0.0013, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.7087198515769946, | |
| "grad_norm": 0.03478335589170456, | |
| "learning_rate": 1.414503133393017e-06, | |
| "loss": 0.0013, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.74582560296846, | |
| "grad_norm": 0.15679460763931274, | |
| "learning_rate": 1.2354521038495972e-06, | |
| "loss": 0.0011, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.782931354359926, | |
| "grad_norm": 0.23235130310058594, | |
| "learning_rate": 1.0564010743061775e-06, | |
| "loss": 0.0011, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.8200371057513913, | |
| "grad_norm": 0.24431759119033813, | |
| "learning_rate": 8.773500447627574e-07, | |
| "loss": 0.0006, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 0.12186744064092636, | |
| "learning_rate": 6.982990152193376e-07, | |
| "loss": 0.0022, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 2.8942486085343226, | |
| "grad_norm": 0.20521441102027893, | |
| "learning_rate": 5.192479856759177e-07, | |
| "loss": 0.0008, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.9313543599257885, | |
| "grad_norm": 0.17911091446876526, | |
| "learning_rate": 3.401969561324978e-07, | |
| "loss": 0.0008, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.9684601113172544, | |
| "grad_norm": 0.7775314450263977, | |
| "learning_rate": 1.611459265890779e-07, | |
| "loss": 0.001, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.9684601113172544, | |
| "eval_loss": 0.015125514939427376, | |
| "eval_runtime": 530.4329, | |
| "eval_samples_per_second": 1.71, | |
| "eval_steps_per_second": 0.055, | |
| "eval_wer": 0.06357574817801209, | |
| "step": 1600 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 1617, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.806754683846656e+19, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |