Invalid JSON: Unexpected token 'N', ..."ad_norm": NaN,
"... is not valid JSON
| { | |
| "best_metric": 22.172258734002074, | |
| "best_model_checkpoint": "results/whisper-base/maithili/checkpoint-56000", | |
| "epoch": 33.249370277078086, | |
| "eval_steps": 1000, | |
| "global_step": 66000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 22.37806510925293, | |
| "learning_rate": 4.6000000000000004e-07, | |
| "loss": 2.3423, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 14.617953300476074, | |
| "learning_rate": 9.600000000000001e-07, | |
| "loss": 2.0051, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 8.992124557495117, | |
| "learning_rate": 1.46e-06, | |
| "loss": 1.4891, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.06942081451416, | |
| "learning_rate": 1.9600000000000003e-06, | |
| "loss": 1.1894, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 5.8592963218688965, | |
| "learning_rate": 2.46e-06, | |
| "loss": 0.9395, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 4.838193893432617, | |
| "learning_rate": 2.96e-06, | |
| "loss": 0.8116, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 4.628206729888916, | |
| "learning_rate": 3.46e-06, | |
| "loss": 0.7077, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 4.1971611976623535, | |
| "learning_rate": 3.96e-06, | |
| "loss": 0.654, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 4.9267659187316895, | |
| "learning_rate": 4.4600000000000005e-06, | |
| "loss": 0.5982, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 4.413062572479248, | |
| "learning_rate": 4.960000000000001e-06, | |
| "loss": 0.5504, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 4.560253620147705, | |
| "learning_rate": 5.460000000000001e-06, | |
| "loss": 0.5413, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 3.9538028240203857, | |
| "learning_rate": 5.9600000000000005e-06, | |
| "loss": 0.5037, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 3.8549587726593018, | |
| "learning_rate": 6.460000000000001e-06, | |
| "loss": 0.4715, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 3.935253620147705, | |
| "learning_rate": 6.96e-06, | |
| "loss": 0.4453, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 4.225427627563477, | |
| "learning_rate": 7.4600000000000006e-06, | |
| "loss": 0.4322, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 4.021173477172852, | |
| "learning_rate": 7.960000000000002e-06, | |
| "loss": 0.4302, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 4.08791446685791, | |
| "learning_rate": 8.46e-06, | |
| "loss": 0.4109, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 4.559580326080322, | |
| "learning_rate": 8.96e-06, | |
| "loss": 0.391, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 4.0354790687561035, | |
| "learning_rate": 9.460000000000001e-06, | |
| "loss": 0.3832, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 4.030752182006836, | |
| "learning_rate": 9.960000000000001e-06, | |
| "loss": 0.373, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 4.1658034324646, | |
| "learning_rate": 9.997688442211056e-06, | |
| "loss": 0.3686, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 4.24349308013916, | |
| "learning_rate": 9.995175879396986e-06, | |
| "loss": 0.3444, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 4.5939555168151855, | |
| "learning_rate": 9.992663316582915e-06, | |
| "loss": 0.3418, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 3.8061537742614746, | |
| "learning_rate": 9.990150753768844e-06, | |
| "loss": 0.3393, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 4.4001922607421875, | |
| "learning_rate": 9.987638190954775e-06, | |
| "loss": 0.3248, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 3.4623498916625977, | |
| "learning_rate": 9.985125628140705e-06, | |
| "loss": 0.3191, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 3.7342793941497803, | |
| "learning_rate": 9.982613065326634e-06, | |
| "loss": 0.3123, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 4.150409698486328, | |
| "learning_rate": 9.980100502512565e-06, | |
| "loss": 0.3102, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 3.992783308029175, | |
| "learning_rate": 9.977587939698493e-06, | |
| "loss": 0.3089, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 3.3655505180358887, | |
| "learning_rate": 9.975075376884424e-06, | |
| "loss": 0.3045, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 3.5667428970336914, | |
| "learning_rate": 9.972562814070353e-06, | |
| "loss": 0.2959, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 4.166114807128906, | |
| "learning_rate": 9.970050251256282e-06, | |
| "loss": 0.2918, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 3.6910595893859863, | |
| "learning_rate": 9.967537688442212e-06, | |
| "loss": 0.2931, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 3.8466129302978516, | |
| "learning_rate": 9.965025125628141e-06, | |
| "loss": 0.2741, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 3.8253138065338135, | |
| "learning_rate": 9.96251256281407e-06, | |
| "loss": 0.2733, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 3.8832590579986572, | |
| "learning_rate": 9.960000000000001e-06, | |
| "loss": 0.2754, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 3.8620283603668213, | |
| "learning_rate": 9.95748743718593e-06, | |
| "loss": 0.2747, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 3.543933629989624, | |
| "learning_rate": 9.95497487437186e-06, | |
| "loss": 0.2651, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 3.5218117237091064, | |
| "learning_rate": 9.952462311557791e-06, | |
| "loss": 0.2769, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 3.711573600769043, | |
| "learning_rate": 9.949949748743718e-06, | |
| "loss": 0.2647, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_loss": 0.23160116374492645, | |
| "eval_runtime": 649.8053, | |
| "eval_samples_per_second": 2.168, | |
| "eval_steps_per_second": 2.168, | |
| "eval_wer": 35.17813905223106, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 3.524477005004883, | |
| "learning_rate": 9.94743718592965e-06, | |
| "loss": 0.2592, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 3.2609434127807617, | |
| "learning_rate": 9.944924623115579e-06, | |
| "loss": 0.2532, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 3.6249492168426514, | |
| "learning_rate": 9.942412060301508e-06, | |
| "loss": 0.2562, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 3.7930243015289307, | |
| "learning_rate": 9.93989949748744e-06, | |
| "loss": 0.2592, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 4.007348537445068, | |
| "learning_rate": 9.937386934673367e-06, | |
| "loss": 0.2517, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 3.8038876056671143, | |
| "learning_rate": 9.934874371859298e-06, | |
| "loss": 0.2409, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 3.6129648685455322, | |
| "learning_rate": 9.932361809045227e-06, | |
| "loss": 0.2468, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 3.1642048358917236, | |
| "learning_rate": 9.929849246231156e-06, | |
| "loss": 0.2483, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 3.557328939437866, | |
| "learning_rate": 9.927336683417086e-06, | |
| "loss": 0.2388, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 3.287649154663086, | |
| "learning_rate": 9.924824120603017e-06, | |
| "loss": 0.2472, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 2.858637809753418, | |
| "learning_rate": 9.922311557788944e-06, | |
| "loss": 0.2377, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 3.016263484954834, | |
| "learning_rate": 9.919798994974875e-06, | |
| "loss": 0.2376, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 3.3102822303771973, | |
| "learning_rate": 9.917286432160805e-06, | |
| "loss": 0.2345, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 3.1757044792175293, | |
| "learning_rate": 9.914773869346734e-06, | |
| "loss": 0.2315, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 3.720780849456787, | |
| "learning_rate": 9.912261306532665e-06, | |
| "loss": 0.2362, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 3.6007885932922363, | |
| "learning_rate": 9.909748743718593e-06, | |
| "loss": 0.2306, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 3.1506540775299072, | |
| "learning_rate": 9.907236180904524e-06, | |
| "loss": 0.2266, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 3.562501907348633, | |
| "learning_rate": 9.904723618090453e-06, | |
| "loss": 0.2244, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 3.256913900375366, | |
| "learning_rate": 9.902211055276382e-06, | |
| "loss": 0.2313, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 3.431098222732544, | |
| "learning_rate": 9.899698492462312e-06, | |
| "loss": 0.2194, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 3.5509750843048096, | |
| "learning_rate": 9.897185929648243e-06, | |
| "loss": 0.2253, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 3.666264295578003, | |
| "learning_rate": 9.894673366834172e-06, | |
| "loss": 0.2214, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 3.291407585144043, | |
| "learning_rate": 9.892160804020101e-06, | |
| "loss": 0.2139, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 3.3444149494171143, | |
| "learning_rate": 9.88964824120603e-06, | |
| "loss": 0.2172, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 3.133206367492676, | |
| "learning_rate": 9.88713567839196e-06, | |
| "loss": 0.2199, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 3.0405352115631104, | |
| "learning_rate": 9.884623115577891e-06, | |
| "loss": 0.2103, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 3.157695770263672, | |
| "learning_rate": 9.882110552763819e-06, | |
| "loss": 0.2155, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 3.13808274269104, | |
| "learning_rate": 9.87959798994975e-06, | |
| "loss": 0.215, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 3.28182315826416, | |
| "learning_rate": 9.877085427135679e-06, | |
| "loss": 0.2139, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 3.1201610565185547, | |
| "learning_rate": 9.874572864321608e-06, | |
| "loss": 0.2018, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 3.426795721054077, | |
| "learning_rate": 9.87206030150754e-06, | |
| "loss": 0.2125, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 3.0987160205841064, | |
| "learning_rate": 9.869547738693469e-06, | |
| "loss": 0.2079, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 3.6653778553009033, | |
| "learning_rate": 9.867035175879398e-06, | |
| "loss": 0.1991, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 3.522376537322998, | |
| "learning_rate": 9.864522613065327e-06, | |
| "loss": 0.1966, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 3.2122714519500732, | |
| "learning_rate": 9.862010050251257e-06, | |
| "loss": 0.1962, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 3.3362936973571777, | |
| "learning_rate": 9.859497487437186e-06, | |
| "loss": 0.2056, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 2.8921637535095215, | |
| "learning_rate": 9.856984924623117e-06, | |
| "loss": 0.192, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 3.2778878211975098, | |
| "learning_rate": 9.854472361809046e-06, | |
| "loss": 0.1953, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 3.4213788509368896, | |
| "learning_rate": 9.851959798994976e-06, | |
| "loss": 0.199, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 2.996750593185425, | |
| "learning_rate": 9.849447236180905e-06, | |
| "loss": 0.1877, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "eval_loss": 0.18496885895729065, | |
| "eval_runtime": 642.3954, | |
| "eval_samples_per_second": 2.193, | |
| "eval_steps_per_second": 2.193, | |
| "eval_wer": 29.546869595295743, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 2.9950320720672607, | |
| "learning_rate": 9.846934673366834e-06, | |
| "loss": 0.1852, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 3.2103137969970703, | |
| "learning_rate": 9.844422110552765e-06, | |
| "loss": 0.1808, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "grad_norm": 2.737065076828003, | |
| "learning_rate": 9.841909547738695e-06, | |
| "loss": 0.1712, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 3.0664756298065186, | |
| "learning_rate": 9.839396984924624e-06, | |
| "loss": 0.1744, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "grad_norm": 3.143113374710083, | |
| "learning_rate": 9.836884422110553e-06, | |
| "loss": 0.1828, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 3.1511785984039307, | |
| "learning_rate": 9.834371859296483e-06, | |
| "loss": 0.1724, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 3.206463575363159, | |
| "learning_rate": 9.831859296482414e-06, | |
| "loss": 0.1742, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 3.2733755111694336, | |
| "learning_rate": 9.829346733668343e-06, | |
| "loss": 0.178, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 2.9530861377716064, | |
| "learning_rate": 9.826834170854272e-06, | |
| "loss": 0.1705, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "grad_norm": 3.1909892559051514, | |
| "learning_rate": 9.824321608040202e-06, | |
| "loss": 0.1759, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "grad_norm": 2.792212724685669, | |
| "learning_rate": 9.821809045226131e-06, | |
| "loss": 0.1641, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 3.1717071533203125, | |
| "learning_rate": 9.81929648241206e-06, | |
| "loss": 0.1709, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "grad_norm": 2.979113817214966, | |
| "learning_rate": 9.816783919597991e-06, | |
| "loss": 0.1655, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 2.8337669372558594, | |
| "learning_rate": 9.81427135678392e-06, | |
| "loss": 0.1691, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 2.9283900260925293, | |
| "learning_rate": 9.81175879396985e-06, | |
| "loss": 0.171, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "grad_norm": 3.450836420059204, | |
| "learning_rate": 9.809246231155781e-06, | |
| "loss": 0.1695, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "grad_norm": 2.9222114086151123, | |
| "learning_rate": 9.806733668341709e-06, | |
| "loss": 0.1696, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "grad_norm": 2.5073487758636475, | |
| "learning_rate": 9.80422110552764e-06, | |
| "loss": 0.1672, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 3.157297372817993, | |
| "learning_rate": 9.801708542713569e-06, | |
| "loss": 0.165, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "grad_norm": 3.0700457096099854, | |
| "learning_rate": 9.799195979899498e-06, | |
| "loss": 0.169, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 3.2579360008239746, | |
| "learning_rate": 9.796683417085428e-06, | |
| "loss": 0.1607, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 2.8195056915283203, | |
| "learning_rate": 9.794170854271357e-06, | |
| "loss": 0.16, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 2.526740312576294, | |
| "learning_rate": 9.791658291457288e-06, | |
| "loss": 0.1678, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "grad_norm": 3.2605721950531006, | |
| "learning_rate": 9.789145728643217e-06, | |
| "loss": 0.1597, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 2.9491026401519775, | |
| "learning_rate": 9.786633165829147e-06, | |
| "loss": 0.1643, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 3.419473886489868, | |
| "learning_rate": 9.784120603015076e-06, | |
| "loss": 0.1631, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "grad_norm": 3.4714505672454834, | |
| "learning_rate": 9.781608040201007e-06, | |
| "loss": 0.1637, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "grad_norm": 2.919762372970581, | |
| "learning_rate": 9.779095477386934e-06, | |
| "loss": 0.1614, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "grad_norm": 2.6376657485961914, | |
| "learning_rate": 9.776582914572866e-06, | |
| "loss": 0.1617, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "grad_norm": 2.929567575454712, | |
| "learning_rate": 9.774070351758795e-06, | |
| "loss": 0.1595, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 2.939025402069092, | |
| "learning_rate": 9.771557788944724e-06, | |
| "loss": 0.1589, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "grad_norm": 3.043203115463257, | |
| "learning_rate": 9.769045226130655e-06, | |
| "loss": 0.1623, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "grad_norm": 2.872321605682373, | |
| "learning_rate": 9.766532663316583e-06, | |
| "loss": 0.1527, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 2.8521175384521484, | |
| "learning_rate": 9.764020100502514e-06, | |
| "loss": 0.1582, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "grad_norm": 2.8888206481933594, | |
| "learning_rate": 9.761507537688443e-06, | |
| "loss": 0.162, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 2.583472490310669, | |
| "learning_rate": 9.758994974874372e-06, | |
| "loss": 0.1559, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "grad_norm": 3.0001070499420166, | |
| "learning_rate": 9.756482412060302e-06, | |
| "loss": 0.1557, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "grad_norm": 3.3756625652313232, | |
| "learning_rate": 9.753969849246233e-06, | |
| "loss": 0.1603, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 2.995574951171875, | |
| "learning_rate": 9.75145728643216e-06, | |
| "loss": 0.1539, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "grad_norm": 2.927722692489624, | |
| "learning_rate": 9.748944723618091e-06, | |
| "loss": 0.1579, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "eval_loss": 0.16459061205387115, | |
| "eval_runtime": 645.71, | |
| "eval_samples_per_second": 2.182, | |
| "eval_steps_per_second": 2.182, | |
| "eval_wer": 26.675890695261156, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 3.3339335918426514, | |
| "learning_rate": 9.74643216080402e-06, | |
| "loss": 0.1559, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "grad_norm": 3.0809624195098877, | |
| "learning_rate": 9.74391959798995e-06, | |
| "loss": 0.1561, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "grad_norm": 2.9823570251464844, | |
| "learning_rate": 9.741407035175881e-06, | |
| "loss": 0.1599, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 2.7149410247802734, | |
| "learning_rate": 9.738894472361809e-06, | |
| "loss": 0.1469, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "grad_norm": 3.8664979934692383, | |
| "learning_rate": 9.73638190954774e-06, | |
| "loss": 0.1545, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "grad_norm": 2.9406516551971436, | |
| "learning_rate": 9.733869346733669e-06, | |
| "loss": 0.154, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 3.1379194259643555, | |
| "learning_rate": 9.731356783919598e-06, | |
| "loss": 0.1545, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "grad_norm": 3.072883129119873, | |
| "learning_rate": 9.72884422110553e-06, | |
| "loss": 0.1545, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 3.329160213470459, | |
| "learning_rate": 9.726331658291459e-06, | |
| "loss": 0.151, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "grad_norm": 2.8267323970794678, | |
| "learning_rate": 9.723819095477388e-06, | |
| "loss": 0.1471, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "grad_norm": 3.380469799041748, | |
| "learning_rate": 9.721306532663317e-06, | |
| "loss": 0.1563, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "grad_norm": 2.6737258434295654, | |
| "learning_rate": 9.718793969849247e-06, | |
| "loss": 0.1505, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "grad_norm": 2.920942783355713, | |
| "learning_rate": 9.716281407035176e-06, | |
| "loss": 0.1474, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "grad_norm": 2.8349497318267822, | |
| "learning_rate": 9.713768844221107e-06, | |
| "loss": 0.1499, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 2.9991960525512695, | |
| "learning_rate": 9.711256281407035e-06, | |
| "loss": 0.1457, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "grad_norm": 3.211735486984253, | |
| "learning_rate": 9.708743718592966e-06, | |
| "loss": 0.1488, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 2.8068394660949707, | |
| "learning_rate": 9.706231155778895e-06, | |
| "loss": 0.144, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "grad_norm": 3.149648904800415, | |
| "learning_rate": 9.703718592964824e-06, | |
| "loss": 0.1476, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 2.5642662048339844, | |
| "learning_rate": 9.701206030150755e-06, | |
| "loss": 0.1452, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 3.3796427249908447, | |
| "learning_rate": 9.698693467336685e-06, | |
| "loss": 0.1498, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 2.480526924133301, | |
| "learning_rate": 9.696180904522614e-06, | |
| "loss": 0.1438, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "grad_norm": 2.742117166519165, | |
| "learning_rate": 9.693668341708543e-06, | |
| "loss": 0.1454, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 3.248408555984497, | |
| "learning_rate": 9.691155778894473e-06, | |
| "loss": 0.1449, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "grad_norm": 2.808727264404297, | |
| "learning_rate": 9.688643216080402e-06, | |
| "loss": 0.1417, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "grad_norm": 2.612992525100708, | |
| "learning_rate": 9.686130653266333e-06, | |
| "loss": 0.1491, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "grad_norm": 2.978003978729248, | |
| "learning_rate": 9.683618090452262e-06, | |
| "loss": 0.1454, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 2.9820547103881836, | |
| "learning_rate": 9.681105527638192e-06, | |
| "loss": 0.1381, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "grad_norm": 2.7569639682769775, | |
| "learning_rate": 9.678592964824121e-06, | |
| "loss": 0.1471, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 3.0688931941986084, | |
| "learning_rate": 9.67608040201005e-06, | |
| "loss": 0.1425, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "grad_norm": 2.921603202819824, | |
| "learning_rate": 9.673567839195981e-06, | |
| "loss": 0.1472, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 2.9307756423950195, | |
| "learning_rate": 9.67105527638191e-06, | |
| "loss": 0.1438, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "grad_norm": 3.2151060104370117, | |
| "learning_rate": 9.66854271356784e-06, | |
| "loss": 0.1428, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "grad_norm": 2.9010095596313477, | |
| "learning_rate": 9.666030150753771e-06, | |
| "loss": 0.1465, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "grad_norm": 2.832845687866211, | |
| "learning_rate": 9.663517587939699e-06, | |
| "loss": 0.1429, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "grad_norm": 2.6402933597564697, | |
| "learning_rate": 9.66100502512563e-06, | |
| "loss": 0.1342, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 2.6498653888702393, | |
| "learning_rate": 9.658492462311559e-06, | |
| "loss": 0.1402, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 2.972980260848999, | |
| "learning_rate": 9.655979899497488e-06, | |
| "loss": 0.1387, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "grad_norm": 3.229097843170166, | |
| "learning_rate": 9.653467336683418e-06, | |
| "loss": 0.1432, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.3604576587677, | |
| "learning_rate": 9.650954773869347e-06, | |
| "loss": 0.1356, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "grad_norm": 2.725226402282715, | |
| "learning_rate": 9.648442211055276e-06, | |
| "loss": 0.1179, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "eval_loss": 0.15386536717414856, | |
| "eval_runtime": 646.9871, | |
| "eval_samples_per_second": 2.178, | |
| "eval_steps_per_second": 2.178, | |
| "eval_wer": 25.105499827049467, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 3.045943260192871, | |
| "learning_rate": 9.645929648241207e-06, | |
| "loss": 0.1157, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 2.593027353286743, | |
| "learning_rate": 9.643417085427137e-06, | |
| "loss": 0.116, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "grad_norm": 2.6555795669555664, | |
| "learning_rate": 9.640904522613066e-06, | |
| "loss": 0.1187, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "grad_norm": 2.776094913482666, | |
| "learning_rate": 9.638391959798997e-06, | |
| "loss": 0.1155, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 3.2772326469421387, | |
| "learning_rate": 9.635879396984925e-06, | |
| "loss": 0.1152, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "grad_norm": 3.0243771076202393, | |
| "learning_rate": 9.633366834170856e-06, | |
| "loss": 0.1199, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "grad_norm": 2.6247713565826416, | |
| "learning_rate": 9.630854271356785e-06, | |
| "loss": 0.117, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 2.80692720413208, | |
| "learning_rate": 9.628341708542714e-06, | |
| "loss": 0.1115, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "grad_norm": 2.899824380874634, | |
| "learning_rate": 9.625829145728644e-06, | |
| "loss": 0.1155, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "grad_norm": 2.8152291774749756, | |
| "learning_rate": 9.623316582914573e-06, | |
| "loss": 0.1129, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "grad_norm": 2.659745931625366, | |
| "learning_rate": 9.620804020100504e-06, | |
| "loss": 0.1143, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "grad_norm": 2.8360331058502197, | |
| "learning_rate": 9.618291457286433e-06, | |
| "loss": 0.1207, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "grad_norm": 3.03417706489563, | |
| "learning_rate": 9.615778894472363e-06, | |
| "loss": 0.1245, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "grad_norm": 2.5418951511383057, | |
| "learning_rate": 9.613266331658292e-06, | |
| "loss": 0.1174, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 2.902958631515503, | |
| "learning_rate": 9.610753768844223e-06, | |
| "loss": 0.1158, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "grad_norm": 3.026547431945801, | |
| "learning_rate": 9.60824120603015e-06, | |
| "loss": 0.1104, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "grad_norm": 2.8732357025146484, | |
| "learning_rate": 9.605728643216082e-06, | |
| "loss": 0.1163, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 2.730586528778076, | |
| "learning_rate": 9.60321608040201e-06, | |
| "loss": 0.1128, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "grad_norm": 2.59441876411438, | |
| "learning_rate": 9.60070351758794e-06, | |
| "loss": 0.1205, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "grad_norm": 2.5764012336730957, | |
| "learning_rate": 9.598190954773871e-06, | |
| "loss": 0.1198, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "grad_norm": 2.6175150871276855, | |
| "learning_rate": 9.595678391959799e-06, | |
| "loss": 0.117, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "grad_norm": 2.7798826694488525, | |
| "learning_rate": 9.59316582914573e-06, | |
| "loss": 0.1101, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "grad_norm": 2.900200605392456, | |
| "learning_rate": 9.59065326633166e-06, | |
| "loss": 0.1102, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 2.6330630779266357, | |
| "learning_rate": 9.588140703517588e-06, | |
| "loss": 0.1164, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "grad_norm": 3.1691510677337646, | |
| "learning_rate": 9.585628140703518e-06, | |
| "loss": 0.1127, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "grad_norm": 2.9986257553100586, | |
| "learning_rate": 9.583115577889449e-06, | |
| "loss": 0.1118, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 2.9816648960113525, | |
| "learning_rate": 9.580603015075378e-06, | |
| "loss": 0.1124, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "grad_norm": 2.7380642890930176, | |
| "learning_rate": 9.578090452261307e-06, | |
| "loss": 0.1101, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "grad_norm": 2.506922721862793, | |
| "learning_rate": 9.575577889447237e-06, | |
| "loss": 0.1101, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "grad_norm": 2.7849462032318115, | |
| "learning_rate": 9.573065326633166e-06, | |
| "loss": 0.1128, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "grad_norm": 2.457066774368286, | |
| "learning_rate": 9.570552763819097e-06, | |
| "loss": 0.1139, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "grad_norm": 2.9274463653564453, | |
| "learning_rate": 9.568040201005025e-06, | |
| "loss": 0.1099, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "grad_norm": 2.54990291595459, | |
| "learning_rate": 9.565527638190956e-06, | |
| "loss": 0.1099, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "grad_norm": 3.0029959678649902, | |
| "learning_rate": 9.563015075376885e-06, | |
| "loss": 0.1132, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "grad_norm": 2.9913110733032227, | |
| "learning_rate": 9.560502512562814e-06, | |
| "loss": 0.1134, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "grad_norm": 2.846057653427124, | |
| "learning_rate": 9.557989949748745e-06, | |
| "loss": 0.112, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 2.566232681274414, | |
| "learning_rate": 9.555477386934675e-06, | |
| "loss": 0.1106, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "grad_norm": 2.8931262493133545, | |
| "learning_rate": 9.552964824120604e-06, | |
| "loss": 0.1099, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "grad_norm": 2.9756979942321777, | |
| "learning_rate": 9.550552763819096e-06, | |
| "loss": 0.1125, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 2.837172508239746, | |
| "learning_rate": 9.548040201005025e-06, | |
| "loss": 0.1083, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "eval_loss": 0.1495467722415924, | |
| "eval_runtime": 644.1684, | |
| "eval_samples_per_second": 2.187, | |
| "eval_steps_per_second": 2.187, | |
| "eval_wer": 24.628156347284676, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "grad_norm": 2.499260902404785, | |
| "learning_rate": 9.545527638190956e-06, | |
| "loss": 0.1115, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "grad_norm": 2.9325485229492188, | |
| "learning_rate": 9.543015075376885e-06, | |
| "loss": 0.1087, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 2.4854938983917236, | |
| "learning_rate": 9.540502512562815e-06, | |
| "loss": 0.1046, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "grad_norm": 3.0302836894989014, | |
| "learning_rate": 9.537989949748746e-06, | |
| "loss": 0.1105, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "grad_norm": 2.6300787925720215, | |
| "learning_rate": 9.535477386934673e-06, | |
| "loss": 0.1093, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "grad_norm": 3.281339168548584, | |
| "learning_rate": 9.532964824120604e-06, | |
| "loss": 0.1133, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "grad_norm": 2.9684898853302, | |
| "learning_rate": 9.530452261306534e-06, | |
| "loss": 0.1072, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "grad_norm": 2.7068192958831787, | |
| "learning_rate": 9.527939698492463e-06, | |
| "loss": 0.1108, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "grad_norm": 2.7589058876037598, | |
| "learning_rate": 9.525427135678392e-06, | |
| "loss": 0.1094, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 2.988163709640503, | |
| "learning_rate": 9.522914572864322e-06, | |
| "loss": 0.1055, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "grad_norm": 2.748220443725586, | |
| "learning_rate": 9.520402010050253e-06, | |
| "loss": 0.1028, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "grad_norm": 2.696606397628784, | |
| "learning_rate": 9.517889447236182e-06, | |
| "loss": 0.1013, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "grad_norm": 2.4986040592193604, | |
| "learning_rate": 9.515376884422111e-06, | |
| "loss": 0.1094, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "grad_norm": 2.414533853530884, | |
| "learning_rate": 9.51286432160804e-06, | |
| "loss": 0.1085, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "grad_norm": 2.5464062690734863, | |
| "learning_rate": 9.510351758793972e-06, | |
| "loss": 0.1029, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "grad_norm": 3.065047264099121, | |
| "learning_rate": 9.5078391959799e-06, | |
| "loss": 0.1013, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "grad_norm": 3.1086618900299072, | |
| "learning_rate": 9.50532663316583e-06, | |
| "loss": 0.1066, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 3.5446290969848633, | |
| "learning_rate": 9.50281407035176e-06, | |
| "loss": 0.1098, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 2.7708733081817627, | |
| "learning_rate": 9.500301507537689e-06, | |
| "loss": 0.109, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "grad_norm": 2.786681652069092, | |
| "learning_rate": 9.49778894472362e-06, | |
| "loss": 0.1062, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "grad_norm": 2.927002429962158, | |
| "learning_rate": 9.49527638190955e-06, | |
| "loss": 0.1053, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 2.6356334686279297, | |
| "learning_rate": 9.492763819095479e-06, | |
| "loss": 0.1098, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "grad_norm": 2.5846285820007324, | |
| "learning_rate": 9.490251256281408e-06, | |
| "loss": 0.1026, | |
| "step": 5575 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "grad_norm": 3.0148589611053467, | |
| "learning_rate": 9.487738693467337e-06, | |
| "loss": 0.108, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "grad_norm": 2.6467926502227783, | |
| "learning_rate": 9.485226130653267e-06, | |
| "loss": 0.1031, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "grad_norm": 2.8061394691467285, | |
| "learning_rate": 9.482713567839198e-06, | |
| "loss": 0.1045, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "grad_norm": 2.6581783294677734, | |
| "learning_rate": 9.480201005025125e-06, | |
| "loss": 0.105, | |
| "step": 5675 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "grad_norm": 2.814573049545288, | |
| "learning_rate": 9.477688442211056e-06, | |
| "loss": 0.107, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 2.7229998111724854, | |
| "learning_rate": 9.475175879396985e-06, | |
| "loss": 0.0983, | |
| "step": 5725 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 2.648622989654541, | |
| "learning_rate": 9.472663316582915e-06, | |
| "loss": 0.108, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "grad_norm": 2.546680212020874, | |
| "learning_rate": 9.470150753768846e-06, | |
| "loss": 0.106, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 2.91450834274292, | |
| "learning_rate": 9.467638190954775e-06, | |
| "loss": 0.1057, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "grad_norm": 2.5046870708465576, | |
| "learning_rate": 9.465125628140704e-06, | |
| "loss": 0.1045, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "grad_norm": 2.452519178390503, | |
| "learning_rate": 9.462613065326634e-06, | |
| "loss": 0.1026, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 2.6275572776794434, | |
| "learning_rate": 9.460100502512563e-06, | |
| "loss": 0.1016, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "grad_norm": 2.612506628036499, | |
| "learning_rate": 9.457587939698494e-06, | |
| "loss": 0.1037, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "grad_norm": 2.721682548522949, | |
| "learning_rate": 9.455075376884423e-06, | |
| "loss": 0.1022, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 2.6826882362365723, | |
| "learning_rate": 9.452562814070353e-06, | |
| "loss": 0.1005, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "grad_norm": 2.7688844203948975, | |
| "learning_rate": 9.450050251256282e-06, | |
| "loss": 0.0843, | |
| "step": 5975 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "grad_norm": 2.4665892124176025, | |
| "learning_rate": 9.447537688442211e-06, | |
| "loss": 0.0804, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "eval_loss": 0.1464279592037201, | |
| "eval_runtime": 645.9237, | |
| "eval_samples_per_second": 2.181, | |
| "eval_steps_per_second": 2.181, | |
| "eval_wer": 24.03320650294016, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "grad_norm": 1.9517433643341064, | |
| "learning_rate": 9.44502512562814e-06, | |
| "loss": 0.081, | |
| "step": 6025 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "grad_norm": 2.6357505321502686, | |
| "learning_rate": 9.442512562814072e-06, | |
| "loss": 0.078, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "grad_norm": 2.9394261837005615, | |
| "learning_rate": 9.440000000000001e-06, | |
| "loss": 0.083, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "grad_norm": 2.944277048110962, | |
| "learning_rate": 9.43748743718593e-06, | |
| "loss": 0.079, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "grad_norm": 2.566026210784912, | |
| "learning_rate": 9.43497487437186e-06, | |
| "loss": 0.0831, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "grad_norm": 2.319978713989258, | |
| "learning_rate": 9.432462311557789e-06, | |
| "loss": 0.0795, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "grad_norm": 2.8877954483032227, | |
| "learning_rate": 9.42994974874372e-06, | |
| "loss": 0.0785, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "grad_norm": 2.5460472106933594, | |
| "learning_rate": 9.42743718592965e-06, | |
| "loss": 0.0801, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "grad_norm": 2.396923303604126, | |
| "learning_rate": 9.424924623115579e-06, | |
| "loss": 0.0826, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "grad_norm": 2.4849960803985596, | |
| "learning_rate": 9.422412060301508e-06, | |
| "loss": 0.081, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "grad_norm": 2.4838786125183105, | |
| "learning_rate": 9.419899497487437e-06, | |
| "loss": 0.0838, | |
| "step": 6275 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "grad_norm": 2.7214527130126953, | |
| "learning_rate": 9.417386934673367e-06, | |
| "loss": 0.082, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "grad_norm": 2.787931203842163, | |
| "learning_rate": 9.414874371859298e-06, | |
| "loss": 0.0817, | |
| "step": 6325 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 2.625025987625122, | |
| "learning_rate": 9.412361809045227e-06, | |
| "loss": 0.0805, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "grad_norm": 2.490147113800049, | |
| "learning_rate": 9.409849246231156e-06, | |
| "loss": 0.0796, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "grad_norm": 2.4763355255126953, | |
| "learning_rate": 9.407336683417086e-06, | |
| "loss": 0.083, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "grad_norm": 3.0030245780944824, | |
| "learning_rate": 9.404824120603015e-06, | |
| "loss": 0.0818, | |
| "step": 6425 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "grad_norm": 2.714149236679077, | |
| "learning_rate": 9.402311557788946e-06, | |
| "loss": 0.0819, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "grad_norm": 2.4112088680267334, | |
| "learning_rate": 9.399798994974875e-06, | |
| "loss": 0.0825, | |
| "step": 6475 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "grad_norm": 2.633383274078369, | |
| "learning_rate": 9.397286432160805e-06, | |
| "loss": 0.0804, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "grad_norm": 2.265183687210083, | |
| "learning_rate": 9.394773869346736e-06, | |
| "loss": 0.0799, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "grad_norm": 2.4261343479156494, | |
| "learning_rate": 9.392261306532663e-06, | |
| "loss": 0.0763, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "grad_norm": 2.6179676055908203, | |
| "learning_rate": 9.389748743718594e-06, | |
| "loss": 0.0796, | |
| "step": 6575 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "grad_norm": 2.648509979248047, | |
| "learning_rate": 9.387236180904524e-06, | |
| "loss": 0.0823, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "grad_norm": 2.4044175148010254, | |
| "learning_rate": 9.384723618090453e-06, | |
| "loss": 0.0791, | |
| "step": 6625 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "grad_norm": 2.3800647258758545, | |
| "learning_rate": 9.382211055276382e-06, | |
| "loss": 0.0788, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "grad_norm": 3.1068170070648193, | |
| "learning_rate": 9.379698492462312e-06, | |
| "loss": 0.0811, | |
| "step": 6675 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "grad_norm": 2.5507326126098633, | |
| "learning_rate": 9.377185929648241e-06, | |
| "loss": 0.0792, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "grad_norm": 2.522341728210449, | |
| "learning_rate": 9.374673366834172e-06, | |
| "loss": 0.0784, | |
| "step": 6725 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "grad_norm": 2.739595890045166, | |
| "learning_rate": 9.372160804020101e-06, | |
| "loss": 0.0791, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "grad_norm": 2.401925802230835, | |
| "learning_rate": 9.36964824120603e-06, | |
| "loss": 0.082, | |
| "step": 6775 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "grad_norm": 3.219940662384033, | |
| "learning_rate": 9.367135678391962e-06, | |
| "loss": 0.0817, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "grad_norm": 3.2623674869537354, | |
| "learning_rate": 9.36462311557789e-06, | |
| "loss": 0.0791, | |
| "step": 6825 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "grad_norm": 2.358572244644165, | |
| "learning_rate": 9.36211055276382e-06, | |
| "loss": 0.0755, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "grad_norm": 3.0506913661956787, | |
| "learning_rate": 9.35959798994975e-06, | |
| "loss": 0.0792, | |
| "step": 6875 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "grad_norm": 2.2486371994018555, | |
| "learning_rate": 9.357085427135679e-06, | |
| "loss": 0.0807, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "grad_norm": 2.8625311851501465, | |
| "learning_rate": 9.354572864321608e-06, | |
| "loss": 0.0787, | |
| "step": 6925 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "grad_norm": 2.4400510787963867, | |
| "learning_rate": 9.352060301507538e-06, | |
| "loss": 0.0804, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "grad_norm": 2.5003409385681152, | |
| "learning_rate": 9.349547738693469e-06, | |
| "loss": 0.0789, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "grad_norm": 2.5204198360443115, | |
| "learning_rate": 9.347035175879398e-06, | |
| "loss": 0.077, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "eval_loss": 0.15057513117790222, | |
| "eval_runtime": 646.2962, | |
| "eval_samples_per_second": 2.18, | |
| "eval_steps_per_second": 2.18, | |
| "eval_wer": 24.240747146316153, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "grad_norm": 2.7666544914245605, | |
| "learning_rate": 9.344522613065327e-06, | |
| "loss": 0.0779, | |
| "step": 7025 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "grad_norm": 3.256955146789551, | |
| "learning_rate": 9.342010050251257e-06, | |
| "loss": 0.0799, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "grad_norm": 2.829012155532837, | |
| "learning_rate": 9.339497487437188e-06, | |
| "loss": 0.0811, | |
| "step": 7075 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "grad_norm": 2.6960537433624268, | |
| "learning_rate": 9.336984924623115e-06, | |
| "loss": 0.0787, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "grad_norm": 2.7486023902893066, | |
| "learning_rate": 9.334472361809046e-06, | |
| "loss": 0.0828, | |
| "step": 7125 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 2.8527791500091553, | |
| "learning_rate": 9.331959798994976e-06, | |
| "loss": 0.0813, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "grad_norm": 2.6692473888397217, | |
| "learning_rate": 9.329447236180905e-06, | |
| "loss": 0.0768, | |
| "step": 7175 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "grad_norm": 2.2904937267303467, | |
| "learning_rate": 9.326934673366836e-06, | |
| "loss": 0.0788, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "grad_norm": 3.499237060546875, | |
| "learning_rate": 9.324422110552764e-06, | |
| "loss": 0.0764, | |
| "step": 7225 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "grad_norm": 2.6315267086029053, | |
| "learning_rate": 9.321909547738695e-06, | |
| "loss": 0.0746, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "grad_norm": 3.006561040878296, | |
| "learning_rate": 9.319396984924624e-06, | |
| "loss": 0.076, | |
| "step": 7275 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "grad_norm": 2.663254976272583, | |
| "learning_rate": 9.316884422110553e-06, | |
| "loss": 0.0784, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "grad_norm": 2.6093807220458984, | |
| "learning_rate": 9.314371859296483e-06, | |
| "loss": 0.0768, | |
| "step": 7325 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "grad_norm": 2.7296223640441895, | |
| "learning_rate": 9.311859296482414e-06, | |
| "loss": 0.0761, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "grad_norm": 2.5256307125091553, | |
| "learning_rate": 9.309346733668343e-06, | |
| "loss": 0.0771, | |
| "step": 7375 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "grad_norm": 2.707585573196411, | |
| "learning_rate": 9.306834170854272e-06, | |
| "loss": 0.0753, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "grad_norm": 3.1183390617370605, | |
| "learning_rate": 9.304321608040201e-06, | |
| "loss": 0.0794, | |
| "step": 7425 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "grad_norm": 2.302847385406494, | |
| "learning_rate": 9.30180904522613e-06, | |
| "loss": 0.0772, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "grad_norm": 2.5927348136901855, | |
| "learning_rate": 9.299296482412062e-06, | |
| "loss": 0.0749, | |
| "step": 7475 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "grad_norm": 2.6165075302124023, | |
| "learning_rate": 9.296783919597991e-06, | |
| "loss": 0.0788, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "grad_norm": 2.674424171447754, | |
| "learning_rate": 9.29427135678392e-06, | |
| "loss": 0.0768, | |
| "step": 7525 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "grad_norm": 2.872770309448242, | |
| "learning_rate": 9.29175879396985e-06, | |
| "loss": 0.0811, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "grad_norm": 2.9125661849975586, | |
| "learning_rate": 9.289246231155779e-06, | |
| "loss": 0.0739, | |
| "step": 7575 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "grad_norm": 2.61698317527771, | |
| "learning_rate": 9.28673366834171e-06, | |
| "loss": 0.0771, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "grad_norm": 3.0637826919555664, | |
| "learning_rate": 9.28422110552764e-06, | |
| "loss": 0.0791, | |
| "step": 7625 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "grad_norm": 2.3239142894744873, | |
| "learning_rate": 9.281708542713569e-06, | |
| "loss": 0.0773, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "grad_norm": 2.6094796657562256, | |
| "learning_rate": 9.279195979899498e-06, | |
| "loss": 0.0755, | |
| "step": 7675 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "grad_norm": 2.530613422393799, | |
| "learning_rate": 9.276683417085427e-06, | |
| "loss": 0.0749, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "grad_norm": 3.653653621673584, | |
| "learning_rate": 9.274170854271357e-06, | |
| "loss": 0.0743, | |
| "step": 7725 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "grad_norm": 2.6792755126953125, | |
| "learning_rate": 9.271658291457288e-06, | |
| "loss": 0.0763, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "grad_norm": 2.9613704681396484, | |
| "learning_rate": 9.269145728643217e-06, | |
| "loss": 0.0723, | |
| "step": 7775 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "grad_norm": 2.2027602195739746, | |
| "learning_rate": 9.266633165829146e-06, | |
| "loss": 0.0789, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "grad_norm": 2.569223165512085, | |
| "learning_rate": 9.264120603015076e-06, | |
| "loss": 0.072, | |
| "step": 7825 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "grad_norm": 2.3976686000823975, | |
| "learning_rate": 9.261608040201005e-06, | |
| "loss": 0.0737, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "grad_norm": 2.5629305839538574, | |
| "learning_rate": 9.259095477386936e-06, | |
| "loss": 0.0762, | |
| "step": 7875 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "grad_norm": 2.397019147872925, | |
| "learning_rate": 9.256582914572865e-06, | |
| "loss": 0.0758, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "grad_norm": 2.764029026031494, | |
| "learning_rate": 9.254070351758795e-06, | |
| "loss": 0.0723, | |
| "step": 7925 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "grad_norm": 2.1665878295898438, | |
| "learning_rate": 9.251557788944724e-06, | |
| "loss": 0.0654, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "grad_norm": 2.1100118160247803, | |
| "learning_rate": 9.249045226130653e-06, | |
| "loss": 0.0557, | |
| "step": 7975 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "grad_norm": 2.4302258491516113, | |
| "learning_rate": 9.246532663316584e-06, | |
| "loss": 0.0539, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "eval_loss": 0.15238162875175476, | |
| "eval_runtime": 644.4842, | |
| "eval_samples_per_second": 2.186, | |
| "eval_steps_per_second": 2.186, | |
| "eval_wer": 23.85333794534763, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "grad_norm": 2.350156545639038, | |
| "learning_rate": 9.244020100502514e-06, | |
| "loss": 0.0541, | |
| "step": 8025 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "grad_norm": 2.4464669227600098, | |
| "learning_rate": 9.241507537688443e-06, | |
| "loss": 0.0538, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "grad_norm": 2.123314619064331, | |
| "learning_rate": 9.238994974874372e-06, | |
| "loss": 0.0545, | |
| "step": 8075 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "grad_norm": 2.584456443786621, | |
| "learning_rate": 9.236482412060302e-06, | |
| "loss": 0.0563, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "grad_norm": 2.46744704246521, | |
| "learning_rate": 9.233969849246231e-06, | |
| "loss": 0.0548, | |
| "step": 8125 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "grad_norm": 2.7734973430633545, | |
| "learning_rate": 9.231457286432162e-06, | |
| "loss": 0.0593, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "grad_norm": 2.5305910110473633, | |
| "learning_rate": 9.228944723618091e-06, | |
| "loss": 0.058, | |
| "step": 8175 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "grad_norm": 2.668431043624878, | |
| "learning_rate": 9.22643216080402e-06, | |
| "loss": 0.0559, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "grad_norm": 2.23030161857605, | |
| "learning_rate": 9.223919597989952e-06, | |
| "loss": 0.0553, | |
| "step": 8225 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "grad_norm": 2.2469186782836914, | |
| "learning_rate": 9.22140703517588e-06, | |
| "loss": 0.0546, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "grad_norm": 2.3184828758239746, | |
| "learning_rate": 9.21889447236181e-06, | |
| "loss": 0.0551, | |
| "step": 8275 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "grad_norm": 2.3341612815856934, | |
| "learning_rate": 9.21638190954774e-06, | |
| "loss": 0.0567, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "grad_norm": 2.4817066192626953, | |
| "learning_rate": 9.213869346733669e-06, | |
| "loss": 0.0531, | |
| "step": 8325 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "grad_norm": 2.299858808517456, | |
| "learning_rate": 9.211356783919598e-06, | |
| "loss": 0.0545, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "grad_norm": 2.6612911224365234, | |
| "learning_rate": 9.208844221105528e-06, | |
| "loss": 0.0546, | |
| "step": 8375 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "grad_norm": 2.7073473930358887, | |
| "learning_rate": 9.206331658291459e-06, | |
| "loss": 0.0551, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "grad_norm": 2.435814142227173, | |
| "learning_rate": 9.203819095477388e-06, | |
| "loss": 0.0538, | |
| "step": 8425 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "grad_norm": 2.920555353164673, | |
| "learning_rate": 9.201306532663317e-06, | |
| "loss": 0.0581, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "grad_norm": 2.4426980018615723, | |
| "learning_rate": 9.198793969849247e-06, | |
| "loss": 0.0527, | |
| "step": 8475 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "grad_norm": 2.282799243927002, | |
| "learning_rate": 9.196281407035178e-06, | |
| "loss": 0.0539, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "grad_norm": 2.3802311420440674, | |
| "learning_rate": 9.193768844221105e-06, | |
| "loss": 0.0544, | |
| "step": 8525 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "grad_norm": 2.6062004566192627, | |
| "learning_rate": 9.191256281407036e-06, | |
| "loss": 0.0539, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "grad_norm": 2.3153014183044434, | |
| "learning_rate": 9.188743718592966e-06, | |
| "loss": 0.0576, | |
| "step": 8575 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "grad_norm": 2.6936705112457275, | |
| "learning_rate": 9.186231155778895e-06, | |
| "loss": 0.0518, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "grad_norm": 2.5648863315582275, | |
| "learning_rate": 9.183718592964826e-06, | |
| "loss": 0.0585, | |
| "step": 8625 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "grad_norm": 2.5685312747955322, | |
| "learning_rate": 9.181206030150754e-06, | |
| "loss": 0.057, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "grad_norm": 2.8490381240844727, | |
| "learning_rate": 9.178693467336685e-06, | |
| "loss": 0.0543, | |
| "step": 8675 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "grad_norm": 2.2152018547058105, | |
| "learning_rate": 9.176180904522614e-06, | |
| "loss": 0.0563, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 2.69919490814209, | |
| "learning_rate": 9.173668341708543e-06, | |
| "loss": 0.0553, | |
| "step": 8725 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "grad_norm": 2.7225608825683594, | |
| "learning_rate": 9.171155778894473e-06, | |
| "loss": 0.0575, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "grad_norm": 2.559675455093384, | |
| "learning_rate": 9.168643216080404e-06, | |
| "loss": 0.0553, | |
| "step": 8775 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "grad_norm": 2.781768798828125, | |
| "learning_rate": 9.166130653266331e-06, | |
| "loss": 0.0578, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "grad_norm": 2.8981781005859375, | |
| "learning_rate": 9.163618090452262e-06, | |
| "loss": 0.0542, | |
| "step": 8825 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "grad_norm": 2.6946628093719482, | |
| "learning_rate": 9.161105527638192e-06, | |
| "loss": 0.059, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "grad_norm": 2.129403591156006, | |
| "learning_rate": 9.158592964824121e-06, | |
| "loss": 0.0509, | |
| "step": 8875 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "grad_norm": 3.163231372833252, | |
| "learning_rate": 9.156080402010052e-06, | |
| "loss": 0.0549, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "grad_norm": 2.8405816555023193, | |
| "learning_rate": 9.15356783919598e-06, | |
| "loss": 0.0563, | |
| "step": 8925 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "grad_norm": 2.1925594806671143, | |
| "learning_rate": 9.15105527638191e-06, | |
| "loss": 0.0553, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "grad_norm": 2.4375970363616943, | |
| "learning_rate": 9.14854271356784e-06, | |
| "loss": 0.0584, | |
| "step": 8975 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "grad_norm": 2.4587666988372803, | |
| "learning_rate": 9.14603015075377e-06, | |
| "loss": 0.0553, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "eval_loss": 0.1637195497751236, | |
| "eval_runtime": 651.0456, | |
| "eval_samples_per_second": 2.164, | |
| "eval_steps_per_second": 2.164, | |
| "eval_wer": 24.344517468004153, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "grad_norm": 2.439188003540039, | |
| "learning_rate": 9.1435175879397e-06, | |
| "loss": 0.0542, | |
| "step": 9025 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "grad_norm": NaN, | |
| "learning_rate": 9.141105527638192e-06, | |
| "loss": 0.0547, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "grad_norm": 2.4117727279663086, | |
| "learning_rate": 9.138592964824121e-06, | |
| "loss": 0.0526, | |
| "step": 9075 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "grad_norm": 2.603896379470825, | |
| "learning_rate": 9.136080402010052e-06, | |
| "loss": 0.0527, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "grad_norm": 2.533517360687256, | |
| "learning_rate": 9.13356783919598e-06, | |
| "loss": 0.0566, | |
| "step": 9125 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "grad_norm": 2.7762629985809326, | |
| "learning_rate": 9.13105527638191e-06, | |
| "loss": 0.0547, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "grad_norm": 2.8527615070343018, | |
| "learning_rate": 9.12854271356784e-06, | |
| "loss": 0.0558, | |
| "step": 9175 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "grad_norm": 2.600090503692627, | |
| "learning_rate": 9.12603015075377e-06, | |
| "loss": 0.0503, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "grad_norm": 2.3100574016571045, | |
| "learning_rate": 9.123618090452263e-06, | |
| "loss": 0.0534, | |
| "step": 9225 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "grad_norm": 2.8445324897766113, | |
| "learning_rate": 9.121105527638192e-06, | |
| "loss": 0.0538, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "grad_norm": 3.1814417839050293, | |
| "learning_rate": 9.118592964824121e-06, | |
| "loss": 0.0546, | |
| "step": 9275 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "grad_norm": 2.835566759109497, | |
| "learning_rate": 9.11608040201005e-06, | |
| "loss": 0.0539, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "grad_norm": 2.6004786491394043, | |
| "learning_rate": 9.11356783919598e-06, | |
| "loss": 0.0497, | |
| "step": 9325 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "grad_norm": 2.2775909900665283, | |
| "learning_rate": 9.111055276381911e-06, | |
| "loss": 0.053, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "grad_norm": 2.242342233657837, | |
| "learning_rate": 9.10854271356784e-06, | |
| "loss": 0.0492, | |
| "step": 9375 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "grad_norm": 2.781096935272217, | |
| "learning_rate": 9.10603015075377e-06, | |
| "loss": 0.0559, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "grad_norm": 2.561607837677002, | |
| "learning_rate": 9.1035175879397e-06, | |
| "loss": 0.0537, | |
| "step": 9425 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "grad_norm": 3.002260208129883, | |
| "learning_rate": 9.101005025125628e-06, | |
| "loss": 0.0564, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "grad_norm": 2.7912750244140625, | |
| "learning_rate": 9.09849246231156e-06, | |
| "loss": 0.0542, | |
| "step": 9475 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "grad_norm": 2.549391508102417, | |
| "learning_rate": 9.095979899497489e-06, | |
| "loss": 0.0529, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 2.707965612411499, | |
| "learning_rate": 9.093467336683418e-06, | |
| "loss": 0.0526, | |
| "step": 9525 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "grad_norm": 2.5057213306427, | |
| "learning_rate": 9.090954773869347e-06, | |
| "loss": 0.0513, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "grad_norm": 3.1057217121124268, | |
| "learning_rate": 9.088442211055277e-06, | |
| "loss": 0.0515, | |
| "step": 9575 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "grad_norm": 2.3811659812927246, | |
| "learning_rate": 9.085929648241206e-06, | |
| "loss": 0.0518, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "grad_norm": 2.412745714187622, | |
| "learning_rate": 9.083417085427137e-06, | |
| "loss": 0.053, | |
| "step": 9625 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "grad_norm": 3.048144578933716, | |
| "learning_rate": 9.080904522613066e-06, | |
| "loss": 0.053, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "grad_norm": 2.3766226768493652, | |
| "learning_rate": 9.078391959798996e-06, | |
| "loss": 0.0526, | |
| "step": 9675 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "grad_norm": 2.6831417083740234, | |
| "learning_rate": 9.075879396984927e-06, | |
| "loss": 0.0525, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "grad_norm": 2.7392430305480957, | |
| "learning_rate": 9.073366834170854e-06, | |
| "loss": 0.0548, | |
| "step": 9725 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "grad_norm": 2.7497315406799316, | |
| "learning_rate": 9.070854271356785e-06, | |
| "loss": 0.0528, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "grad_norm": 2.9299416542053223, | |
| "learning_rate": 9.068341708542715e-06, | |
| "loss": 0.0542, | |
| "step": 9775 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "grad_norm": 2.889383316040039, | |
| "learning_rate": 9.065829145728644e-06, | |
| "loss": 0.0527, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "grad_norm": 2.648606777191162, | |
| "learning_rate": 9.063316582914573e-06, | |
| "loss": 0.0513, | |
| "step": 9825 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "grad_norm": 2.225612163543701, | |
| "learning_rate": 9.060804020100502e-06, | |
| "loss": 0.0526, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "grad_norm": 2.932143211364746, | |
| "learning_rate": 9.058291457286433e-06, | |
| "loss": 0.051, | |
| "step": 9875 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "grad_norm": 2.607672691345215, | |
| "learning_rate": 9.055778894472363e-06, | |
| "loss": 0.053, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 2.8472115993499756, | |
| "learning_rate": 9.053266331658292e-06, | |
| "loss": 0.0557, | |
| "step": 9925 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "grad_norm": 2.171074151992798, | |
| "learning_rate": 9.050753768844221e-06, | |
| "loss": 0.0356, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "grad_norm": 1.950042486190796, | |
| "learning_rate": 9.048241206030152e-06, | |
| "loss": 0.0381, | |
| "step": 9975 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "grad_norm": 1.7547545433044434, | |
| "learning_rate": 9.04572864321608e-06, | |
| "loss": 0.036, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "eval_loss": 0.16767631471157074, | |
| "eval_runtime": 646.7265, | |
| "eval_samples_per_second": 2.179, | |
| "eval_steps_per_second": 2.179, | |
| "eval_wer": 23.65963334486337, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "grad_norm": 1.8145438432693481, | |
| "learning_rate": 9.043216080402011e-06, | |
| "loss": 0.0339, | |
| "step": 10025 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "grad_norm": 2.891932725906372, | |
| "learning_rate": 9.04070351758794e-06, | |
| "loss": 0.0351, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "grad_norm": 1.8098477125167847, | |
| "learning_rate": 9.03819095477387e-06, | |
| "loss": 0.0362, | |
| "step": 10075 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "grad_norm": 2.386594772338867, | |
| "learning_rate": 9.0356783919598e-06, | |
| "loss": 0.0373, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "grad_norm": 2.028424024581909, | |
| "learning_rate": 9.033165829145728e-06, | |
| "loss": 0.0367, | |
| "step": 10125 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "grad_norm": 2.1175694465637207, | |
| "learning_rate": 9.03065326633166e-06, | |
| "loss": 0.0386, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "grad_norm": 1.9647341966629028, | |
| "learning_rate": 9.028140703517589e-06, | |
| "loss": 0.0347, | |
| "step": 10175 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "grad_norm": 2.252744197845459, | |
| "learning_rate": 9.025628140703518e-06, | |
| "loss": 0.0354, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "grad_norm": 2.485556125640869, | |
| "learning_rate": 9.023115577889447e-06, | |
| "loss": 0.0358, | |
| "step": 10225 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "grad_norm": 2.0512161254882812, | |
| "learning_rate": 9.020603015075378e-06, | |
| "loss": 0.0365, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "grad_norm": 2.402486801147461, | |
| "learning_rate": 9.018090452261308e-06, | |
| "loss": 0.0337, | |
| "step": 10275 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "grad_norm": 2.146170139312744, | |
| "learning_rate": 9.015577889447237e-06, | |
| "loss": 0.035, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "grad_norm": 2.5019354820251465, | |
| "learning_rate": 9.013065326633166e-06, | |
| "loss": 0.0378, | |
| "step": 10325 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "grad_norm": 2.4359068870544434, | |
| "learning_rate": 9.010552763819096e-06, | |
| "loss": 0.0366, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "grad_norm": 2.1449923515319824, | |
| "learning_rate": 9.008040201005027e-06, | |
| "loss": 0.0364, | |
| "step": 10375 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "grad_norm": 2.202234983444214, | |
| "learning_rate": 9.005527638190954e-06, | |
| "loss": 0.0378, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "grad_norm": 2.267660140991211, | |
| "learning_rate": 9.003015075376885e-06, | |
| "loss": 0.0358, | |
| "step": 10425 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "grad_norm": 2.0055835247039795, | |
| "learning_rate": 9.000502512562815e-06, | |
| "loss": 0.0368, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "grad_norm": 1.9618968963623047, | |
| "learning_rate": 8.997989949748744e-06, | |
| "loss": 0.0351, | |
| "step": 10475 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "grad_norm": 2.122114896774292, | |
| "learning_rate": 8.995477386934675e-06, | |
| "loss": 0.0356, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "grad_norm": 2.236201047897339, | |
| "learning_rate": 8.992964824120604e-06, | |
| "loss": 0.037, | |
| "step": 10525 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "grad_norm": 2.286752939224243, | |
| "learning_rate": 8.990452261306534e-06, | |
| "loss": 0.0353, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "grad_norm": 2.0843496322631836, | |
| "learning_rate": 8.987939698492463e-06, | |
| "loss": 0.0343, | |
| "step": 10575 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "grad_norm": 3.129362106323242, | |
| "learning_rate": 8.985427135678392e-06, | |
| "loss": 0.0376, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "grad_norm": 2.6233270168304443, | |
| "learning_rate": 8.982914572864322e-06, | |
| "loss": 0.0338, | |
| "step": 10625 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "grad_norm": 2.1038076877593994, | |
| "learning_rate": 8.980402010050253e-06, | |
| "loss": 0.0367, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "grad_norm": 2.270951509475708, | |
| "learning_rate": 8.977889447236182e-06, | |
| "loss": 0.0371, | |
| "step": 10675 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "grad_norm": 2.929248094558716, | |
| "learning_rate": 8.975376884422111e-06, | |
| "loss": 0.0375, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "grad_norm": 1.823087453842163, | |
| "learning_rate": 8.97286432160804e-06, | |
| "loss": 0.0357, | |
| "step": 10725 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "grad_norm": 2.542175054550171, | |
| "learning_rate": 8.97035175879397e-06, | |
| "loss": 0.0355, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "grad_norm": 2.396777391433716, | |
| "learning_rate": 8.967839195979901e-06, | |
| "loss": 0.0369, | |
| "step": 10775 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "grad_norm": 2.5692787170410156, | |
| "learning_rate": 8.96532663316583e-06, | |
| "loss": 0.0369, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "grad_norm": 2.5763325691223145, | |
| "learning_rate": 8.96281407035176e-06, | |
| "loss": 0.0356, | |
| "step": 10825 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "grad_norm": 1.9143195152282715, | |
| "learning_rate": 8.960301507537689e-06, | |
| "loss": 0.0361, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "grad_norm": 2.463517904281616, | |
| "learning_rate": 8.957788944723618e-06, | |
| "loss": 0.0347, | |
| "step": 10875 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "grad_norm": 2.0476324558258057, | |
| "learning_rate": 8.95527638190955e-06, | |
| "loss": 0.0389, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "grad_norm": 3.286231517791748, | |
| "learning_rate": 8.952763819095479e-06, | |
| "loss": 0.0353, | |
| "step": 10925 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "grad_norm": 2.1984260082244873, | |
| "learning_rate": 8.950251256281408e-06, | |
| "loss": 0.0366, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "grad_norm": 2.0697944164276123, | |
| "learning_rate": 8.947738693467337e-06, | |
| "loss": 0.0356, | |
| "step": 10975 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "grad_norm": 2.1701056957244873, | |
| "learning_rate": 8.945226130653267e-06, | |
| "loss": 0.0349, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "eval_loss": 0.17827929556369781, | |
| "eval_runtime": 649.3211, | |
| "eval_samples_per_second": 2.17, | |
| "eval_steps_per_second": 2.17, | |
| "eval_wer": 24.047042545831893, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "grad_norm": 2.3994946479797363, | |
| "learning_rate": 8.942713567839196e-06, | |
| "loss": 0.0345, | |
| "step": 11025 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "grad_norm": 2.7759196758270264, | |
| "learning_rate": 8.940201005025127e-06, | |
| "loss": 0.0357, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "grad_norm": 2.57523775100708, | |
| "learning_rate": 8.937688442211056e-06, | |
| "loss": 0.0327, | |
| "step": 11075 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "grad_norm": 2.1448755264282227, | |
| "learning_rate": 8.935175879396986e-06, | |
| "loss": 0.0372, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "grad_norm": 2.378547191619873, | |
| "learning_rate": 8.932663316582915e-06, | |
| "loss": 0.0357, | |
| "step": 11125 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "grad_norm": 2.524625539779663, | |
| "learning_rate": 8.930150753768844e-06, | |
| "loss": 0.0366, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "grad_norm": 2.485322952270508, | |
| "learning_rate": 8.927638190954775e-06, | |
| "loss": 0.0347, | |
| "step": 11175 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "grad_norm": 2.4604809284210205, | |
| "learning_rate": 8.925125628140705e-06, | |
| "loss": 0.0356, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "grad_norm": 2.805788516998291, | |
| "learning_rate": 8.922613065326634e-06, | |
| "loss": 0.0378, | |
| "step": 11225 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "grad_norm": 2.620722770690918, | |
| "learning_rate": 8.920100502512563e-06, | |
| "loss": 0.0352, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "grad_norm": 2.9701807498931885, | |
| "learning_rate": 8.917587939698493e-06, | |
| "loss": 0.036, | |
| "step": 11275 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "grad_norm": 2.5234711170196533, | |
| "learning_rate": 8.915075376884424e-06, | |
| "loss": 0.0392, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "grad_norm": 2.4073734283447266, | |
| "learning_rate": 8.912562814070353e-06, | |
| "loss": 0.0373, | |
| "step": 11325 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "grad_norm": 2.699392318725586, | |
| "learning_rate": 8.910050251256282e-06, | |
| "loss": 0.0377, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "grad_norm": 2.1058201789855957, | |
| "learning_rate": 8.907537688442212e-06, | |
| "loss": 0.0353, | |
| "step": 11375 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "grad_norm": 2.494295597076416, | |
| "learning_rate": 8.905025125628143e-06, | |
| "loss": 0.0365, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "grad_norm": 2.452155828475952, | |
| "learning_rate": 8.90251256281407e-06, | |
| "loss": 0.0356, | |
| "step": 11425 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "grad_norm": 2.7919886112213135, | |
| "learning_rate": 8.900000000000001e-06, | |
| "loss": 0.035, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "grad_norm": 2.2973413467407227, | |
| "learning_rate": 8.89748743718593e-06, | |
| "loss": 0.034, | |
| "step": 11475 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "grad_norm": 2.4735491275787354, | |
| "learning_rate": 8.89497487437186e-06, | |
| "loss": 0.0346, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "grad_norm": 2.2433793544769287, | |
| "learning_rate": 8.892462311557791e-06, | |
| "loss": 0.0357, | |
| "step": 11525 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "grad_norm": 2.586491107940674, | |
| "learning_rate": 8.889949748743718e-06, | |
| "loss": 0.0373, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "grad_norm": 2.7574408054351807, | |
| "learning_rate": 8.88743718592965e-06, | |
| "loss": 0.0368, | |
| "step": 11575 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "grad_norm": 2.4347455501556396, | |
| "learning_rate": 8.884924623115579e-06, | |
| "loss": 0.0377, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "grad_norm": 2.858201503753662, | |
| "learning_rate": 8.882412060301508e-06, | |
| "loss": 0.036, | |
| "step": 11625 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "grad_norm": 2.416962146759033, | |
| "learning_rate": 8.879899497487437e-06, | |
| "loss": 0.0356, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "grad_norm": 2.5054562091827393, | |
| "learning_rate": 8.877386934673368e-06, | |
| "loss": 0.0356, | |
| "step": 11675 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "grad_norm": 2.83569073677063, | |
| "learning_rate": 8.874874371859296e-06, | |
| "loss": 0.0354, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "grad_norm": 3.158905029296875, | |
| "learning_rate": 8.872361809045227e-06, | |
| "loss": 0.0348, | |
| "step": 11725 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "grad_norm": 2.396923780441284, | |
| "learning_rate": 8.869849246231156e-06, | |
| "loss": 0.0339, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "grad_norm": 2.8449628353118896, | |
| "learning_rate": 8.867336683417086e-06, | |
| "loss": 0.037, | |
| "step": 11775 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "grad_norm": 2.5769689083099365, | |
| "learning_rate": 8.864824120603017e-06, | |
| "loss": 0.0354, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "grad_norm": 2.1106691360473633, | |
| "learning_rate": 8.862311557788944e-06, | |
| "loss": 0.0343, | |
| "step": 11825 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "grad_norm": 2.392437696456909, | |
| "learning_rate": 8.859798994974875e-06, | |
| "loss": 0.0353, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "grad_norm": 2.2878997325897217, | |
| "learning_rate": 8.857286432160805e-06, | |
| "loss": 0.0354, | |
| "step": 11875 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "grad_norm": 2.6576852798461914, | |
| "learning_rate": 8.854773869346734e-06, | |
| "loss": 0.0363, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "grad_norm": 1.76813542842865, | |
| "learning_rate": 8.852261306532665e-06, | |
| "loss": 0.0284, | |
| "step": 11925 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "grad_norm": 1.7315902709960938, | |
| "learning_rate": 8.849748743718594e-06, | |
| "loss": 0.0213, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "grad_norm": 2.2705891132354736, | |
| "learning_rate": 8.847236180904524e-06, | |
| "loss": 0.0219, | |
| "step": 11975 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "grad_norm": 1.689342737197876, | |
| "learning_rate": 8.844723618090453e-06, | |
| "loss": 0.0231, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 6.05, | |
| "eval_loss": 0.18743818998336792, | |
| "eval_runtime": 649.1371, | |
| "eval_samples_per_second": 2.171, | |
| "eval_steps_per_second": 2.171, | |
| "eval_wer": 23.784157730888968, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "grad_norm": 1.7385754585266113, | |
| "learning_rate": 8.842211055276382e-06, | |
| "loss": 0.0216, | |
| "step": 12025 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "grad_norm": 1.7763196229934692, | |
| "learning_rate": 8.839698492462312e-06, | |
| "loss": 0.0225, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "grad_norm": 1.9993950128555298, | |
| "learning_rate": 8.837185929648243e-06, | |
| "loss": 0.0232, | |
| "step": 12075 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "grad_norm": 1.7563095092773438, | |
| "learning_rate": 8.83467336683417e-06, | |
| "loss": 0.0224, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 6.11, | |
| "grad_norm": 1.6103582382202148, | |
| "learning_rate": 8.832160804020101e-06, | |
| "loss": 0.0225, | |
| "step": 12125 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "grad_norm": 1.9436490535736084, | |
| "learning_rate": 8.82964824120603e-06, | |
| "loss": 0.021, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "grad_norm": 2.0505597591400146, | |
| "learning_rate": 8.82713567839196e-06, | |
| "loss": 0.0221, | |
| "step": 12175 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "grad_norm": 1.8634746074676514, | |
| "learning_rate": 8.824623115577891e-06, | |
| "loss": 0.0238, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "grad_norm": 2.337662696838379, | |
| "learning_rate": 8.82211055276382e-06, | |
| "loss": 0.0227, | |
| "step": 12225 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "grad_norm": 2.1642353534698486, | |
| "learning_rate": 8.81959798994975e-06, | |
| "loss": 0.0224, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "grad_norm": 1.816988468170166, | |
| "learning_rate": 8.817085427135679e-06, | |
| "loss": 0.0238, | |
| "step": 12275 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "grad_norm": 1.9968360662460327, | |
| "learning_rate": 8.814572864321608e-06, | |
| "loss": 0.0234, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "grad_norm": 2.105729579925537, | |
| "learning_rate": 8.812060301507538e-06, | |
| "loss": 0.0231, | |
| "step": 12325 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "grad_norm": 2.2226688861846924, | |
| "learning_rate": 8.809547738693469e-06, | |
| "loss": 0.023, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "grad_norm": 1.8855944871902466, | |
| "learning_rate": 8.807035175879398e-06, | |
| "loss": 0.0215, | |
| "step": 12375 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "grad_norm": 1.9994747638702393, | |
| "learning_rate": 8.804522613065327e-06, | |
| "loss": 0.0242, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "grad_norm": 1.9194012880325317, | |
| "learning_rate": 8.802010050251257e-06, | |
| "loss": 0.022, | |
| "step": 12425 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "grad_norm": 2.565969944000244, | |
| "learning_rate": 8.799497487437186e-06, | |
| "loss": 0.0235, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "grad_norm": 2.2084851264953613, | |
| "learning_rate": 8.796984924623117e-06, | |
| "loss": 0.0223, | |
| "step": 12475 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "grad_norm": 2.1252388954162598, | |
| "learning_rate": 8.794472361809046e-06, | |
| "loss": 0.0241, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "grad_norm": 2.299900531768799, | |
| "learning_rate": 8.791959798994976e-06, | |
| "loss": 0.0238, | |
| "step": 12525 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "grad_norm": 2.038466215133667, | |
| "learning_rate": 8.789447236180905e-06, | |
| "loss": 0.0217, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "grad_norm": 2.136720657348633, | |
| "learning_rate": 8.786934673366834e-06, | |
| "loss": 0.0232, | |
| "step": 12575 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "grad_norm": 2.5748748779296875, | |
| "learning_rate": 8.784422110552765e-06, | |
| "loss": 0.0238, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "grad_norm": 3.1436619758605957, | |
| "learning_rate": 8.781909547738695e-06, | |
| "loss": 0.025, | |
| "step": 12625 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "grad_norm": 1.8555638790130615, | |
| "learning_rate": 8.779396984924624e-06, | |
| "loss": 0.0237, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 6.39, | |
| "grad_norm": 1.8384400606155396, | |
| "learning_rate": 8.776884422110553e-06, | |
| "loss": 0.0239, | |
| "step": 12675 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "grad_norm": 2.0759224891662598, | |
| "learning_rate": 8.774371859296483e-06, | |
| "loss": 0.0215, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "grad_norm": 2.09233021736145, | |
| "learning_rate": 8.771859296482412e-06, | |
| "loss": 0.0246, | |
| "step": 12725 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "grad_norm": 2.437635660171509, | |
| "learning_rate": 8.769346733668343e-06, | |
| "loss": 0.0229, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "grad_norm": 2.3498973846435547, | |
| "learning_rate": 8.766834170854272e-06, | |
| "loss": 0.0237, | |
| "step": 12775 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "grad_norm": 2.1013572216033936, | |
| "learning_rate": 8.764321608040202e-06, | |
| "loss": 0.0233, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "grad_norm": 2.2095558643341064, | |
| "learning_rate": 8.761809045226131e-06, | |
| "loss": 0.0236, | |
| "step": 12825 | |
| }, | |
| { | |
| "epoch": 6.47, | |
| "grad_norm": 3.4572582244873047, | |
| "learning_rate": 8.75929648241206e-06, | |
| "loss": 0.0219, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "grad_norm": 2.6476633548736572, | |
| "learning_rate": 8.756783919597991e-06, | |
| "loss": 0.0231, | |
| "step": 12875 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "grad_norm": 2.358466863632202, | |
| "learning_rate": 8.75427135678392e-06, | |
| "loss": 0.0227, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "grad_norm": 2.037827253341675, | |
| "learning_rate": 8.75175879396985e-06, | |
| "loss": 0.024, | |
| "step": 12925 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "grad_norm": 2.3307857513427734, | |
| "learning_rate": 8.74924623115578e-06, | |
| "loss": 0.022, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "grad_norm": 2.1784884929656982, | |
| "learning_rate": 8.746733668341709e-06, | |
| "loss": 0.0218, | |
| "step": 12975 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "grad_norm": 2.4882514476776123, | |
| "learning_rate": 8.74422110552764e-06, | |
| "loss": 0.023, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "eval_loss": 0.20282986760139465, | |
| "eval_runtime": 651.7625, | |
| "eval_samples_per_second": 2.162, | |
| "eval_steps_per_second": 2.162, | |
| "eval_wer": 24.57973019716361, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "grad_norm": 3.0175468921661377, | |
| "learning_rate": 8.741809045226131e-06, | |
| "loss": 0.0246, | |
| "step": 13025 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "grad_norm": 2.1591644287109375, | |
| "learning_rate": 8.73929648241206e-06, | |
| "loss": 0.0238, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "grad_norm": 2.0232603549957275, | |
| "learning_rate": 8.736783919597991e-06, | |
| "loss": 0.0231, | |
| "step": 13075 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "grad_norm": 1.9856449365615845, | |
| "learning_rate": 8.734271356783919e-06, | |
| "loss": 0.023, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "grad_norm": 1.9815651178359985, | |
| "learning_rate": 8.73175879396985e-06, | |
| "loss": 0.0237, | |
| "step": 13125 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "grad_norm": 1.602400302886963, | |
| "learning_rate": 8.72924623115578e-06, | |
| "loss": 0.0255, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "grad_norm": 2.4619295597076416, | |
| "learning_rate": 8.726733668341709e-06, | |
| "loss": 0.0226, | |
| "step": 13175 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "grad_norm": 2.190075397491455, | |
| "learning_rate": 8.72422110552764e-06, | |
| "loss": 0.0245, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 6.66, | |
| "grad_norm": 1.8968470096588135, | |
| "learning_rate": 8.721708542713569e-06, | |
| "loss": 0.0221, | |
| "step": 13225 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "grad_norm": 2.0752451419830322, | |
| "learning_rate": 8.719195979899498e-06, | |
| "loss": 0.0227, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "grad_norm": 1.8338621854782104, | |
| "learning_rate": 8.716683417085428e-06, | |
| "loss": 0.0231, | |
| "step": 13275 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "grad_norm": 2.2742509841918945, | |
| "learning_rate": 8.714170854271357e-06, | |
| "loss": 0.0247, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 6.71, | |
| "grad_norm": 2.536423921585083, | |
| "learning_rate": 8.711658291457286e-06, | |
| "loss": 0.0234, | |
| "step": 13325 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "grad_norm": 1.7209787368774414, | |
| "learning_rate": 8.709145728643217e-06, | |
| "loss": 0.0237, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "grad_norm": 2.257042646408081, | |
| "learning_rate": 8.706633165829147e-06, | |
| "loss": 0.0229, | |
| "step": 13375 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "grad_norm": 1.924156665802002, | |
| "learning_rate": 8.704120603015076e-06, | |
| "loss": 0.0212, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "grad_norm": 2.343059778213501, | |
| "learning_rate": 8.701608040201005e-06, | |
| "loss": 0.0236, | |
| "step": 13425 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "grad_norm": 2.157313108444214, | |
| "learning_rate": 8.699095477386935e-06, | |
| "loss": 0.0214, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "grad_norm": 2.0653934478759766, | |
| "learning_rate": 8.696582914572866e-06, | |
| "loss": 0.024, | |
| "step": 13475 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "grad_norm": 2.4266180992126465, | |
| "learning_rate": 8.694070351758795e-06, | |
| "loss": 0.0239, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "grad_norm": 2.243062973022461, | |
| "learning_rate": 8.691557788944724e-06, | |
| "loss": 0.0224, | |
| "step": 13525 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "grad_norm": 2.309316873550415, | |
| "learning_rate": 8.689045226130654e-06, | |
| "loss": 0.0238, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "grad_norm": 1.8502180576324463, | |
| "learning_rate": 8.686532663316583e-06, | |
| "loss": 0.0214, | |
| "step": 13575 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "grad_norm": 2.0743794441223145, | |
| "learning_rate": 8.684020100502514e-06, | |
| "loss": 0.0239, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "grad_norm": 2.225356101989746, | |
| "learning_rate": 8.681507537688443e-06, | |
| "loss": 0.0212, | |
| "step": 13625 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "grad_norm": 2.3651390075683594, | |
| "learning_rate": 8.678994974874373e-06, | |
| "loss": 0.0246, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "grad_norm": 2.4614925384521484, | |
| "learning_rate": 8.676482412060302e-06, | |
| "loss": 0.0237, | |
| "step": 13675 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "grad_norm": 2.4582881927490234, | |
| "learning_rate": 8.673969849246231e-06, | |
| "loss": 0.0232, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "grad_norm": 2.3366026878356934, | |
| "learning_rate": 8.67145728643216e-06, | |
| "loss": 0.0236, | |
| "step": 13725 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "grad_norm": 2.092181444168091, | |
| "learning_rate": 8.668944723618092e-06, | |
| "loss": 0.0237, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "grad_norm": 2.092453956604004, | |
| "learning_rate": 8.666432160804021e-06, | |
| "loss": 0.022, | |
| "step": 13775 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "grad_norm": 2.475292682647705, | |
| "learning_rate": 8.66391959798995e-06, | |
| "loss": 0.0229, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "grad_norm": 2.873953104019165, | |
| "learning_rate": 8.661407035175881e-06, | |
| "loss": 0.0231, | |
| "step": 13825 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "grad_norm": 2.619523763656616, | |
| "learning_rate": 8.658894472361809e-06, | |
| "loss": 0.0252, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "grad_norm": 2.5735602378845215, | |
| "learning_rate": 8.65638190954774e-06, | |
| "loss": 0.0231, | |
| "step": 13875 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 1.6024060249328613, | |
| "learning_rate": 8.65386934673367e-06, | |
| "loss": 0.0213, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "grad_norm": 1.5747658014297485, | |
| "learning_rate": 8.651356783919599e-06, | |
| "loss": 0.0136, | |
| "step": 13925 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "grad_norm": 1.8268576860427856, | |
| "learning_rate": 8.648844221105528e-06, | |
| "loss": 0.0146, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "grad_norm": 1.4051076173782349, | |
| "learning_rate": 8.646331658291457e-06, | |
| "loss": 0.0145, | |
| "step": 13975 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "grad_norm": 1.6383821964263916, | |
| "learning_rate": 8.643819095477388e-06, | |
| "loss": 0.0145, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "eval_loss": 0.20823825895786285, | |
| "eval_runtime": 650.647, | |
| "eval_samples_per_second": 2.166, | |
| "eval_steps_per_second": 2.166, | |
| "eval_wer": 24.088550674507093, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "grad_norm": 1.911106824874878, | |
| "learning_rate": 8.641306532663318e-06, | |
| "loss": 0.014, | |
| "step": 14025 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "grad_norm": 1.6214771270751953, | |
| "learning_rate": 8.638793969849247e-06, | |
| "loss": 0.0141, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 7.09, | |
| "grad_norm": 1.3229256868362427, | |
| "learning_rate": 8.636281407035176e-06, | |
| "loss": 0.0147, | |
| "step": 14075 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "grad_norm": 1.4375226497650146, | |
| "learning_rate": 8.633768844221107e-06, | |
| "loss": 0.0137, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "grad_norm": 1.7731012105941772, | |
| "learning_rate": 8.631256281407035e-06, | |
| "loss": 0.0138, | |
| "step": 14125 | |
| }, | |
| { | |
| "epoch": 7.13, | |
| "grad_norm": 1.825411319732666, | |
| "learning_rate": 8.628743718592966e-06, | |
| "loss": 0.0141, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "grad_norm": 2.0631155967712402, | |
| "learning_rate": 8.626231155778895e-06, | |
| "loss": 0.0128, | |
| "step": 14175 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "grad_norm": 1.8785593509674072, | |
| "learning_rate": 8.623718592964825e-06, | |
| "loss": 0.0135, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "grad_norm": 1.9748233556747437, | |
| "learning_rate": 8.621206030150756e-06, | |
| "loss": 0.0135, | |
| "step": 14225 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "grad_norm": 1.8081029653549194, | |
| "learning_rate": 8.618693467336683e-06, | |
| "loss": 0.0148, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "grad_norm": 1.8967790603637695, | |
| "learning_rate": 8.616180904522614e-06, | |
| "loss": 0.0152, | |
| "step": 14275 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "grad_norm": 1.9241995811462402, | |
| "learning_rate": 8.613668341708544e-06, | |
| "loss": 0.0144, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 7.22, | |
| "grad_norm": 1.5693718194961548, | |
| "learning_rate": 8.611155778894473e-06, | |
| "loss": 0.015, | |
| "step": 14325 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "grad_norm": 1.7336968183517456, | |
| "learning_rate": 8.608643216080402e-06, | |
| "loss": 0.0146, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 7.24, | |
| "grad_norm": 1.6103991270065308, | |
| "learning_rate": 8.606130653266333e-06, | |
| "loss": 0.0143, | |
| "step": 14375 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "grad_norm": 2.3941450119018555, | |
| "learning_rate": 8.60361809045226e-06, | |
| "loss": 0.0159, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "grad_norm": 1.8727612495422363, | |
| "learning_rate": 8.601105527638192e-06, | |
| "loss": 0.0136, | |
| "step": 14425 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "grad_norm": 1.4570417404174805, | |
| "learning_rate": 8.598592964824121e-06, | |
| "loss": 0.0149, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "grad_norm": 2.069019317626953, | |
| "learning_rate": 8.59608040201005e-06, | |
| "loss": 0.0139, | |
| "step": 14475 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "grad_norm": 2.3598926067352295, | |
| "learning_rate": 8.593567839195981e-06, | |
| "loss": 0.0148, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "grad_norm": 1.342768907546997, | |
| "learning_rate": 8.591055276381909e-06, | |
| "loss": 0.0139, | |
| "step": 14525 | |
| }, | |
| { | |
| "epoch": 7.33, | |
| "grad_norm": 1.2774055004119873, | |
| "learning_rate": 8.58854271356784e-06, | |
| "loss": 0.0153, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "grad_norm": 1.7923460006713867, | |
| "learning_rate": 8.58603015075377e-06, | |
| "loss": 0.0137, | |
| "step": 14575 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "grad_norm": 1.9054781198501587, | |
| "learning_rate": 8.583618090452261e-06, | |
| "loss": 0.015, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "grad_norm": 1.9677022695541382, | |
| "learning_rate": 8.581105527638192e-06, | |
| "loss": 0.0151, | |
| "step": 14625 | |
| }, | |
| { | |
| "epoch": 7.38, | |
| "grad_norm": 1.8171250820159912, | |
| "learning_rate": 8.578592964824121e-06, | |
| "loss": 0.0145, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "grad_norm": 1.4638991355895996, | |
| "learning_rate": 8.57608040201005e-06, | |
| "loss": 0.0138, | |
| "step": 14675 | |
| }, | |
| { | |
| "epoch": 7.41, | |
| "grad_norm": 2.3554527759552, | |
| "learning_rate": 8.573567839195982e-06, | |
| "loss": 0.0148, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "grad_norm": 1.273007869720459, | |
| "learning_rate": 8.57105527638191e-06, | |
| "loss": 0.0145, | |
| "step": 14725 | |
| }, | |
| { | |
| "epoch": 7.43, | |
| "grad_norm": 2.289254665374756, | |
| "learning_rate": 8.56854271356784e-06, | |
| "loss": 0.015, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "grad_norm": 1.6287271976470947, | |
| "learning_rate": 8.56603015075377e-06, | |
| "loss": 0.0153, | |
| "step": 14775 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "grad_norm": 2.6135382652282715, | |
| "learning_rate": 8.563517587939699e-06, | |
| "loss": 0.0151, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 7.47, | |
| "grad_norm": 3.0830838680267334, | |
| "learning_rate": 8.56100502512563e-06, | |
| "loss": 0.0161, | |
| "step": 14825 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "grad_norm": 2.3788559436798096, | |
| "learning_rate": 8.558492462311558e-06, | |
| "loss": 0.0149, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "grad_norm": 1.2800523042678833, | |
| "learning_rate": 8.555979899497489e-06, | |
| "loss": 0.0146, | |
| "step": 14875 | |
| }, | |
| { | |
| "epoch": 7.51, | |
| "grad_norm": 2.244983434677124, | |
| "learning_rate": 8.553467336683418e-06, | |
| "loss": 0.0151, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "grad_norm": 2.2439663410186768, | |
| "learning_rate": 8.550954773869347e-06, | |
| "loss": 0.0154, | |
| "step": 14925 | |
| }, | |
| { | |
| "epoch": 7.53, | |
| "grad_norm": 1.619199514389038, | |
| "learning_rate": 8.548442211055277e-06, | |
| "loss": 0.0149, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 7.54, | |
| "grad_norm": 2.012608289718628, | |
| "learning_rate": 8.545929648241208e-06, | |
| "loss": 0.0161, | |
| "step": 14975 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "grad_norm": 1.9494653940200806, | |
| "learning_rate": 8.543417085427135e-06, | |
| "loss": 0.0151, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "eval_loss": 0.21707138419151306, | |
| "eval_runtime": 647.0406, | |
| "eval_samples_per_second": 2.178, | |
| "eval_steps_per_second": 2.178, | |
| "eval_wer": 24.047042545831893, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "grad_norm": 1.4061529636383057, | |
| "learning_rate": 8.540904522613066e-06, | |
| "loss": 0.0143, | |
| "step": 15025 | |
| }, | |
| { | |
| "epoch": 7.58, | |
| "grad_norm": 1.8309212923049927, | |
| "learning_rate": 8.538391959798996e-06, | |
| "loss": 0.0149, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "grad_norm": 2.8870351314544678, | |
| "learning_rate": 8.535879396984925e-06, | |
| "loss": 0.0148, | |
| "step": 15075 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "grad_norm": 2.2940802574157715, | |
| "learning_rate": 8.533366834170856e-06, | |
| "loss": 0.015, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 7.62, | |
| "grad_norm": 2.183642625808716, | |
| "learning_rate": 8.530854271356784e-06, | |
| "loss": 0.0144, | |
| "step": 15125 | |
| }, | |
| { | |
| "epoch": 7.63, | |
| "grad_norm": 1.9402830600738525, | |
| "learning_rate": 8.528341708542715e-06, | |
| "loss": 0.0147, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "grad_norm": 2.082012414932251, | |
| "learning_rate": 8.525829145728644e-06, | |
| "loss": 0.0161, | |
| "step": 15175 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "grad_norm": 1.9440219402313232, | |
| "learning_rate": 8.523316582914573e-06, | |
| "loss": 0.0143, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "grad_norm": 1.9995568990707397, | |
| "learning_rate": 8.520804020100503e-06, | |
| "loss": 0.0157, | |
| "step": 15225 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "grad_norm": 2.117058515548706, | |
| "learning_rate": 8.518291457286434e-06, | |
| "loss": 0.0145, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 7.7, | |
| "grad_norm": 1.6028704643249512, | |
| "learning_rate": 8.515778894472363e-06, | |
| "loss": 0.0146, | |
| "step": 15275 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "grad_norm": 2.337704658508301, | |
| "learning_rate": 8.513266331658292e-06, | |
| "loss": 0.0161, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "grad_norm": 2.1473734378814697, | |
| "learning_rate": 8.510753768844222e-06, | |
| "loss": 0.015, | |
| "step": 15325 | |
| }, | |
| { | |
| "epoch": 7.73, | |
| "grad_norm": 2.084581136703491, | |
| "learning_rate": 8.508241206030151e-06, | |
| "loss": 0.0169, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 7.75, | |
| "grad_norm": 1.7122375965118408, | |
| "learning_rate": 8.505728643216082e-06, | |
| "loss": 0.0158, | |
| "step": 15375 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "grad_norm": 1.8660895824432373, | |
| "learning_rate": 8.50321608040201e-06, | |
| "loss": 0.0159, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 7.77, | |
| "grad_norm": 1.6958028078079224, | |
| "learning_rate": 8.50070351758794e-06, | |
| "loss": 0.0157, | |
| "step": 15425 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "grad_norm": 1.7803666591644287, | |
| "learning_rate": 8.49819095477387e-06, | |
| "loss": 0.016, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 7.8, | |
| "grad_norm": 2.085075855255127, | |
| "learning_rate": 8.4956783919598e-06, | |
| "loss": 0.0153, | |
| "step": 15475 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "grad_norm": 1.9727402925491333, | |
| "learning_rate": 8.49316582914573e-06, | |
| "loss": 0.0142, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "grad_norm": 2.111431837081909, | |
| "learning_rate": 8.49065326633166e-06, | |
| "loss": 0.0151, | |
| "step": 15525 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "grad_norm": 1.8781590461730957, | |
| "learning_rate": 8.488140703517589e-06, | |
| "loss": 0.0149, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 7.85, | |
| "grad_norm": 1.771287441253662, | |
| "learning_rate": 8.485628140703518e-06, | |
| "loss": 0.0147, | |
| "step": 15575 | |
| }, | |
| { | |
| "epoch": 7.86, | |
| "grad_norm": 2.1958467960357666, | |
| "learning_rate": 8.483115577889447e-06, | |
| "loss": 0.0157, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 7.87, | |
| "grad_norm": 2.4910318851470947, | |
| "learning_rate": 8.480603015075377e-06, | |
| "loss": 0.0154, | |
| "step": 15625 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "grad_norm": 2.2781054973602295, | |
| "learning_rate": 8.478090452261308e-06, | |
| "loss": 0.0155, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "grad_norm": 2.349332332611084, | |
| "learning_rate": 8.475577889447237e-06, | |
| "loss": 0.015, | |
| "step": 15675 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "grad_norm": 1.707576870918274, | |
| "learning_rate": 8.473065326633166e-06, | |
| "loss": 0.0159, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "grad_norm": 2.0872557163238525, | |
| "learning_rate": 8.470552763819096e-06, | |
| "loss": 0.0158, | |
| "step": 15725 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "grad_norm": 2.1867074966430664, | |
| "learning_rate": 8.468040201005025e-06, | |
| "loss": 0.0157, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 7.95, | |
| "grad_norm": 1.594016671180725, | |
| "learning_rate": 8.465527638190956e-06, | |
| "loss": 0.0153, | |
| "step": 15775 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "grad_norm": 2.078763246536255, | |
| "learning_rate": 8.463015075376885e-06, | |
| "loss": 0.0155, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 7.97, | |
| "grad_norm": 1.8778270483016968, | |
| "learning_rate": 8.460502512562815e-06, | |
| "loss": 0.0161, | |
| "step": 15825 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "grad_norm": 1.7115347385406494, | |
| "learning_rate": 8.457989949748744e-06, | |
| "loss": 0.0155, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 1.6807602643966675, | |
| "learning_rate": 8.455477386934673e-06, | |
| "loss": 0.0144, | |
| "step": 15875 | |
| }, | |
| { | |
| "epoch": 8.01, | |
| "grad_norm": 1.332097053527832, | |
| "learning_rate": 8.452964824120604e-06, | |
| "loss": 0.0097, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 8.02, | |
| "grad_norm": 1.1560015678405762, | |
| "learning_rate": 8.450452261306534e-06, | |
| "loss": 0.0079, | |
| "step": 15925 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "grad_norm": 1.238347053527832, | |
| "learning_rate": 8.447939698492463e-06, | |
| "loss": 0.0095, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 8.05, | |
| "grad_norm": 1.5310404300689697, | |
| "learning_rate": 8.445427135678392e-06, | |
| "loss": 0.0092, | |
| "step": 15975 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "grad_norm": 1.309462547302246, | |
| "learning_rate": 8.442914572864322e-06, | |
| "loss": 0.0086, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "eval_loss": 0.22892026603221893, | |
| "eval_runtime": 642.6728, | |
| "eval_samples_per_second": 2.192, | |
| "eval_steps_per_second": 2.192, | |
| "eval_wer": 24.524386025596677, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 8.07, | |
| "grad_norm": 2.2282941341400146, | |
| "learning_rate": 8.440402010050251e-06, | |
| "loss": 0.0084, | |
| "step": 16025 | |
| }, | |
| { | |
| "epoch": 8.09, | |
| "grad_norm": 1.7921063899993896, | |
| "learning_rate": 8.437889447236182e-06, | |
| "loss": 0.0096, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 8.1, | |
| "grad_norm": 1.3115910291671753, | |
| "learning_rate": 8.435376884422111e-06, | |
| "loss": 0.0088, | |
| "step": 16075 | |
| }, | |
| { | |
| "epoch": 8.11, | |
| "grad_norm": 1.0947102308273315, | |
| "learning_rate": 8.43286432160804e-06, | |
| "loss": 0.0087, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "grad_norm": 1.5556052923202515, | |
| "learning_rate": 8.430351758793972e-06, | |
| "loss": 0.0096, | |
| "step": 16125 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "grad_norm": 1.2743710279464722, | |
| "learning_rate": 8.4278391959799e-06, | |
| "loss": 0.0085, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 8.15, | |
| "grad_norm": 1.6002846956253052, | |
| "learning_rate": 8.42532663316583e-06, | |
| "loss": 0.0084, | |
| "step": 16175 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "grad_norm": 1.713494896888733, | |
| "learning_rate": 8.42281407035176e-06, | |
| "loss": 0.008, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 8.17, | |
| "grad_norm": 1.761599063873291, | |
| "learning_rate": 8.420301507537689e-06, | |
| "loss": 0.0095, | |
| "step": 16225 | |
| }, | |
| { | |
| "epoch": 8.19, | |
| "grad_norm": 2.072874069213867, | |
| "learning_rate": 8.417788944723618e-06, | |
| "loss": 0.01, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 8.2, | |
| "grad_norm": 1.140694499015808, | |
| "learning_rate": 8.415276381909548e-06, | |
| "loss": 0.0098, | |
| "step": 16275 | |
| }, | |
| { | |
| "epoch": 8.21, | |
| "grad_norm": 1.6233490705490112, | |
| "learning_rate": 8.412763819095479e-06, | |
| "loss": 0.0099, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "grad_norm": 1.6382660865783691, | |
| "learning_rate": 8.410251256281408e-06, | |
| "loss": 0.0093, | |
| "step": 16325 | |
| }, | |
| { | |
| "epoch": 8.24, | |
| "grad_norm": 1.4330692291259766, | |
| "learning_rate": 8.407738693467337e-06, | |
| "loss": 0.0093, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 8.25, | |
| "grad_norm": 1.7515724897384644, | |
| "learning_rate": 8.405226130653267e-06, | |
| "loss": 0.0087, | |
| "step": 16375 | |
| }, | |
| { | |
| "epoch": 8.26, | |
| "grad_norm": 2.1178219318389893, | |
| "learning_rate": 8.402713567839198e-06, | |
| "loss": 0.0097, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "grad_norm": 2.0358147621154785, | |
| "learning_rate": 8.400201005025125e-06, | |
| "loss": 0.0099, | |
| "step": 16425 | |
| }, | |
| { | |
| "epoch": 8.29, | |
| "grad_norm": 1.4251277446746826, | |
| "learning_rate": 8.397688442211056e-06, | |
| "loss": 0.0097, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 8.3, | |
| "grad_norm": 1.8948729038238525, | |
| "learning_rate": 8.395175879396986e-06, | |
| "loss": 0.01, | |
| "step": 16475 | |
| }, | |
| { | |
| "epoch": 8.31, | |
| "grad_norm": 2.1116690635681152, | |
| "learning_rate": 8.392663316582915e-06, | |
| "loss": 0.0097, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "grad_norm": 2.2370381355285645, | |
| "learning_rate": 8.390150753768846e-06, | |
| "loss": 0.0096, | |
| "step": 16525 | |
| }, | |
| { | |
| "epoch": 8.34, | |
| "grad_norm": 1.6887876987457275, | |
| "learning_rate": 8.387638190954774e-06, | |
| "loss": 0.0097, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 8.35, | |
| "grad_norm": 1.1379011869430542, | |
| "learning_rate": 8.385125628140705e-06, | |
| "loss": 0.0099, | |
| "step": 16575 | |
| }, | |
| { | |
| "epoch": 8.36, | |
| "grad_norm": 1.4453860521316528, | |
| "learning_rate": 8.382613065326634e-06, | |
| "loss": 0.0103, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 8.38, | |
| "grad_norm": 1.388378620147705, | |
| "learning_rate": 8.380100502512563e-06, | |
| "loss": 0.0106, | |
| "step": 16625 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "grad_norm": 2.015660285949707, | |
| "learning_rate": 8.377587939698493e-06, | |
| "loss": 0.0098, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "grad_norm": 1.5107704401016235, | |
| "learning_rate": 8.375075376884424e-06, | |
| "loss": 0.0101, | |
| "step": 16675 | |
| }, | |
| { | |
| "epoch": 8.41, | |
| "grad_norm": 1.5902012586593628, | |
| "learning_rate": 8.372562814070353e-06, | |
| "loss": 0.01, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 8.43, | |
| "grad_norm": 1.8684821128845215, | |
| "learning_rate": 8.370050251256282e-06, | |
| "loss": 0.0102, | |
| "step": 16725 | |
| }, | |
| { | |
| "epoch": 8.44, | |
| "grad_norm": 1.8811595439910889, | |
| "learning_rate": 8.367537688442212e-06, | |
| "loss": 0.0097, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 8.45, | |
| "grad_norm": 2.1212289333343506, | |
| "learning_rate": 8.365025125628141e-06, | |
| "loss": 0.0109, | |
| "step": 16775 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "grad_norm": 1.8294142484664917, | |
| "learning_rate": 8.362512562814072e-06, | |
| "loss": 0.0105, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "grad_norm": 1.940006971359253, | |
| "learning_rate": 8.36e-06, | |
| "loss": 0.0097, | |
| "step": 16825 | |
| }, | |
| { | |
| "epoch": 8.49, | |
| "grad_norm": 1.9175909757614136, | |
| "learning_rate": 8.35748743718593e-06, | |
| "loss": 0.0111, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "grad_norm": 1.57523775100708, | |
| "learning_rate": 8.35497487437186e-06, | |
| "loss": 0.0095, | |
| "step": 16875 | |
| }, | |
| { | |
| "epoch": 8.51, | |
| "grad_norm": 1.825614094734192, | |
| "learning_rate": 8.35246231155779e-06, | |
| "loss": 0.0106, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 8.53, | |
| "grad_norm": 2.1004061698913574, | |
| "learning_rate": 8.34994974874372e-06, | |
| "loss": 0.0114, | |
| "step": 16925 | |
| }, | |
| { | |
| "epoch": 8.54, | |
| "grad_norm": 2.540674924850464, | |
| "learning_rate": 8.34743718592965e-06, | |
| "loss": 0.0099, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 8.55, | |
| "grad_norm": 1.962494134902954, | |
| "learning_rate": 8.344924623115579e-06, | |
| "loss": 0.0102, | |
| "step": 16975 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "grad_norm": 1.7412070035934448, | |
| "learning_rate": 8.342412060301508e-06, | |
| "loss": 0.0093, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "eval_loss": 0.238552525639534, | |
| "eval_runtime": 647.4251, | |
| "eval_samples_per_second": 2.176, | |
| "eval_steps_per_second": 2.176, | |
| "eval_wer": 24.828778969214802, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 8.58, | |
| "grad_norm": 2.0897905826568604, | |
| "learning_rate": 8.339899497487438e-06, | |
| "loss": 0.0105, | |
| "step": 17025 | |
| }, | |
| { | |
| "epoch": 8.59, | |
| "grad_norm": 2.1660778522491455, | |
| "learning_rate": 8.337386934673367e-06, | |
| "loss": 0.0103, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "grad_norm": 1.5732314586639404, | |
| "learning_rate": 8.334874371859298e-06, | |
| "loss": 0.0089, | |
| "step": 17075 | |
| }, | |
| { | |
| "epoch": 8.61, | |
| "grad_norm": 1.8174026012420654, | |
| "learning_rate": 8.332361809045226e-06, | |
| "loss": 0.0115, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 8.63, | |
| "grad_norm": 2.1241679191589355, | |
| "learning_rate": 8.329849246231157e-06, | |
| "loss": 0.0098, | |
| "step": 17125 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "grad_norm": 2.3191757202148438, | |
| "learning_rate": 8.327336683417086e-06, | |
| "loss": 0.01, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 8.65, | |
| "grad_norm": 1.6752322912216187, | |
| "learning_rate": 8.324824120603015e-06, | |
| "loss": 0.01, | |
| "step": 17175 | |
| }, | |
| { | |
| "epoch": 8.66, | |
| "grad_norm": 2.2477939128875732, | |
| "learning_rate": 8.322311557788946e-06, | |
| "loss": 0.0093, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 8.68, | |
| "grad_norm": 1.754935622215271, | |
| "learning_rate": 8.319798994974876e-06, | |
| "loss": 0.0104, | |
| "step": 17225 | |
| }, | |
| { | |
| "epoch": 8.69, | |
| "grad_norm": 1.7442086935043335, | |
| "learning_rate": 8.317286432160805e-06, | |
| "loss": 0.0096, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "grad_norm": 1.5157816410064697, | |
| "learning_rate": 8.314773869346734e-06, | |
| "loss": 0.0099, | |
| "step": 17275 | |
| }, | |
| { | |
| "epoch": 8.72, | |
| "grad_norm": 1.630161166191101, | |
| "learning_rate": 8.312261306532663e-06, | |
| "loss": 0.0095, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "grad_norm": 1.7793052196502686, | |
| "learning_rate": 8.309748743718595e-06, | |
| "loss": 0.0118, | |
| "step": 17325 | |
| }, | |
| { | |
| "epoch": 8.74, | |
| "grad_norm": 2.110165596008301, | |
| "learning_rate": 8.307236180904524e-06, | |
| "loss": 0.0099, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "grad_norm": 2.5670361518859863, | |
| "learning_rate": 8.304723618090453e-06, | |
| "loss": 0.0111, | |
| "step": 17375 | |
| }, | |
| { | |
| "epoch": 8.77, | |
| "grad_norm": 1.9518669843673706, | |
| "learning_rate": 8.302211055276382e-06, | |
| "loss": 0.0107, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 8.78, | |
| "grad_norm": 1.6892119646072388, | |
| "learning_rate": 8.299698492462312e-06, | |
| "loss": 0.0107, | |
| "step": 17425 | |
| }, | |
| { | |
| "epoch": 8.79, | |
| "grad_norm": 1.523242473602295, | |
| "learning_rate": 8.297185929648241e-06, | |
| "loss": 0.0108, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "grad_norm": 1.384037733078003, | |
| "learning_rate": 8.294673366834172e-06, | |
| "loss": 0.0109, | |
| "step": 17475 | |
| }, | |
| { | |
| "epoch": 8.82, | |
| "grad_norm": 1.202498435974121, | |
| "learning_rate": 8.292160804020101e-06, | |
| "loss": 0.0107, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 8.83, | |
| "grad_norm": 1.504539966583252, | |
| "learning_rate": 8.28964824120603e-06, | |
| "loss": 0.0105, | |
| "step": 17525 | |
| }, | |
| { | |
| "epoch": 8.84, | |
| "grad_norm": 1.6238044500350952, | |
| "learning_rate": 8.287135678391962e-06, | |
| "loss": 0.0102, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "grad_norm": 2.15169358253479, | |
| "learning_rate": 8.28462311557789e-06, | |
| "loss": 0.0096, | |
| "step": 17575 | |
| }, | |
| { | |
| "epoch": 8.87, | |
| "grad_norm": 1.6884123086929321, | |
| "learning_rate": 8.28211055276382e-06, | |
| "loss": 0.011, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "grad_norm": 1.7390291690826416, | |
| "learning_rate": 8.27959798994975e-06, | |
| "loss": 0.0097, | |
| "step": 17625 | |
| }, | |
| { | |
| "epoch": 8.89, | |
| "grad_norm": 2.6695148944854736, | |
| "learning_rate": 8.277085427135679e-06, | |
| "loss": 0.0103, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 8.9, | |
| "grad_norm": 1.2569199800491333, | |
| "learning_rate": 8.274572864321608e-06, | |
| "loss": 0.0101, | |
| "step": 17675 | |
| }, | |
| { | |
| "epoch": 8.92, | |
| "grad_norm": 1.746862769126892, | |
| "learning_rate": 8.272060301507538e-06, | |
| "loss": 0.0114, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 8.93, | |
| "grad_norm": 1.8386530876159668, | |
| "learning_rate": 8.269547738693467e-06, | |
| "loss": 0.0097, | |
| "step": 17725 | |
| }, | |
| { | |
| "epoch": 8.94, | |
| "grad_norm": 1.7652249336242676, | |
| "learning_rate": 8.267035175879398e-06, | |
| "loss": 0.0094, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "grad_norm": 1.5770463943481445, | |
| "learning_rate": 8.264522613065327e-06, | |
| "loss": 0.0106, | |
| "step": 17775 | |
| }, | |
| { | |
| "epoch": 8.97, | |
| "grad_norm": 2.018402338027954, | |
| "learning_rate": 8.262010050251257e-06, | |
| "loss": 0.0104, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "grad_norm": 1.1194132566452026, | |
| "learning_rate": 8.259497487437188e-06, | |
| "loss": 0.0119, | |
| "step": 17825 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "grad_norm": 1.8884706497192383, | |
| "learning_rate": 8.256984924623115e-06, | |
| "loss": 0.0115, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "grad_norm": 1.9271539449691772, | |
| "learning_rate": 8.254472361809046e-06, | |
| "loss": 0.0096, | |
| "step": 17875 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "grad_norm": 1.3741806745529175, | |
| "learning_rate": 8.251959798994976e-06, | |
| "loss": 0.0058, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 9.03, | |
| "grad_norm": 1.7070800065994263, | |
| "learning_rate": 8.249447236180905e-06, | |
| "loss": 0.0056, | |
| "step": 17925 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "grad_norm": 1.0155062675476074, | |
| "learning_rate": 8.246934673366836e-06, | |
| "loss": 0.0057, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 9.06, | |
| "grad_norm": 1.206286907196045, | |
| "learning_rate": 8.244422110552764e-06, | |
| "loss": 0.0059, | |
| "step": 17975 | |
| }, | |
| { | |
| "epoch": 9.07, | |
| "grad_norm": 1.463138461112976, | |
| "learning_rate": 8.241909547738695e-06, | |
| "loss": 0.0064, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 9.07, | |
| "eval_loss": 0.2465205192565918, | |
| "eval_runtime": 640.5205, | |
| "eval_samples_per_second": 2.2, | |
| "eval_steps_per_second": 2.2, | |
| "eval_wer": 23.770321687997235, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 9.08, | |
| "grad_norm": 1.703255295753479, | |
| "learning_rate": 8.239396984924624e-06, | |
| "loss": 0.0073, | |
| "step": 18025 | |
| }, | |
| { | |
| "epoch": 9.09, | |
| "grad_norm": 2.1183061599731445, | |
| "learning_rate": 8.236884422110553e-06, | |
| "loss": 0.0068, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 9.11, | |
| "grad_norm": 1.6237412691116333, | |
| "learning_rate": 8.234371859296483e-06, | |
| "loss": 0.0068, | |
| "step": 18075 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "grad_norm": 1.5408103466033936, | |
| "learning_rate": 8.231859296482414e-06, | |
| "loss": 0.0066, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "grad_norm": 1.5907872915267944, | |
| "learning_rate": 8.229346733668341e-06, | |
| "loss": 0.0062, | |
| "step": 18125 | |
| }, | |
| { | |
| "epoch": 9.14, | |
| "grad_norm": 1.9507079124450684, | |
| "learning_rate": 8.226834170854272e-06, | |
| "loss": 0.0059, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 9.16, | |
| "grad_norm": 1.63676118850708, | |
| "learning_rate": 8.224321608040202e-06, | |
| "loss": 0.0061, | |
| "step": 18175 | |
| }, | |
| { | |
| "epoch": 9.17, | |
| "grad_norm": 1.1874332427978516, | |
| "learning_rate": 8.221809045226131e-06, | |
| "loss": 0.0068, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 9.18, | |
| "grad_norm": 1.4674066305160522, | |
| "learning_rate": 8.219296482412062e-06, | |
| "loss": 0.0069, | |
| "step": 18225 | |
| }, | |
| { | |
| "epoch": 9.19, | |
| "grad_norm": 1.2431071996688843, | |
| "learning_rate": 8.21678391959799e-06, | |
| "loss": 0.0069, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 9.21, | |
| "grad_norm": 1.8287932872772217, | |
| "learning_rate": 8.21427135678392e-06, | |
| "loss": 0.0072, | |
| "step": 18275 | |
| }, | |
| { | |
| "epoch": 9.22, | |
| "grad_norm": 1.375955581665039, | |
| "learning_rate": 8.21175879396985e-06, | |
| "loss": 0.0064, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 9.23, | |
| "grad_norm": 1.492646336555481, | |
| "learning_rate": 8.20924623115578e-06, | |
| "loss": 0.0065, | |
| "step": 18325 | |
| }, | |
| { | |
| "epoch": 9.24, | |
| "grad_norm": 2.215989828109741, | |
| "learning_rate": 8.206733668341709e-06, | |
| "loss": 0.006, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 9.26, | |
| "grad_norm": 1.6772810220718384, | |
| "learning_rate": 8.20422110552764e-06, | |
| "loss": 0.0069, | |
| "step": 18375 | |
| }, | |
| { | |
| "epoch": 9.27, | |
| "grad_norm": 1.0609341859817505, | |
| "learning_rate": 8.201708542713569e-06, | |
| "loss": 0.0067, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 9.28, | |
| "grad_norm": 1.8097535371780396, | |
| "learning_rate": 8.199195979899498e-06, | |
| "loss": 0.0072, | |
| "step": 18425 | |
| }, | |
| { | |
| "epoch": 9.29, | |
| "grad_norm": 1.3503798246383667, | |
| "learning_rate": 8.196683417085428e-06, | |
| "loss": 0.0066, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 9.31, | |
| "grad_norm": 1.5325782299041748, | |
| "learning_rate": 8.194170854271357e-06, | |
| "loss": 0.0069, | |
| "step": 18475 | |
| }, | |
| { | |
| "epoch": 9.32, | |
| "grad_norm": 1.3074427843093872, | |
| "learning_rate": 8.191658291457288e-06, | |
| "loss": 0.0069, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 9.33, | |
| "grad_norm": 1.6791765689849854, | |
| "learning_rate": 8.189145728643216e-06, | |
| "loss": 0.008, | |
| "step": 18525 | |
| }, | |
| { | |
| "epoch": 9.35, | |
| "grad_norm": 1.9651422500610352, | |
| "learning_rate": 8.186633165829147e-06, | |
| "loss": 0.0073, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 9.36, | |
| "grad_norm": 1.8639694452285767, | |
| "learning_rate": 8.184120603015076e-06, | |
| "loss": 0.0071, | |
| "step": 18575 | |
| }, | |
| { | |
| "epoch": 9.37, | |
| "grad_norm": 1.2917574644088745, | |
| "learning_rate": 8.181608040201005e-06, | |
| "loss": 0.0065, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 9.38, | |
| "grad_norm": 1.3170853853225708, | |
| "learning_rate": 8.179095477386936e-06, | |
| "loss": 0.0063, | |
| "step": 18625 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "grad_norm": 2.0459671020507812, | |
| "learning_rate": 8.176582914572866e-06, | |
| "loss": 0.0064, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 9.41, | |
| "grad_norm": 2.978271245956421, | |
| "learning_rate": 8.174070351758795e-06, | |
| "loss": 0.007, | |
| "step": 18675 | |
| }, | |
| { | |
| "epoch": 9.42, | |
| "grad_norm": 1.3300830125808716, | |
| "learning_rate": 8.171557788944724e-06, | |
| "loss": 0.0067, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 9.43, | |
| "grad_norm": 2.000188112258911, | |
| "learning_rate": 8.169045226130654e-06, | |
| "loss": 0.0072, | |
| "step": 18725 | |
| }, | |
| { | |
| "epoch": 9.45, | |
| "grad_norm": 2.4457991123199463, | |
| "learning_rate": 8.166532663316583e-06, | |
| "loss": 0.0078, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 9.46, | |
| "grad_norm": 0.9397627711296082, | |
| "learning_rate": 8.164020100502514e-06, | |
| "loss": 0.0071, | |
| "step": 18775 | |
| }, | |
| { | |
| "epoch": 9.47, | |
| "grad_norm": 2.0449130535125732, | |
| "learning_rate": 8.161507537688443e-06, | |
| "loss": 0.0067, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 9.48, | |
| "grad_norm": 1.4966685771942139, | |
| "learning_rate": 8.158994974874373e-06, | |
| "loss": 0.0074, | |
| "step": 18825 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "grad_norm": 1.2508461475372314, | |
| "learning_rate": 8.156482412060302e-06, | |
| "loss": 0.0061, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 9.51, | |
| "grad_norm": 1.4124109745025635, | |
| "learning_rate": 8.154070351758795e-06, | |
| "loss": 0.0071, | |
| "step": 18875 | |
| }, | |
| { | |
| "epoch": 9.52, | |
| "grad_norm": 1.9999688863754272, | |
| "learning_rate": 8.151557788944724e-06, | |
| "loss": 0.0075, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 9.53, | |
| "grad_norm": 2.0439536571502686, | |
| "learning_rate": 8.149045226130654e-06, | |
| "loss": 0.0067, | |
| "step": 18925 | |
| }, | |
| { | |
| "epoch": 9.55, | |
| "grad_norm": 2.8810691833496094, | |
| "learning_rate": 8.146532663316583e-06, | |
| "loss": 0.0069, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 9.56, | |
| "grad_norm": 1.9366052150726318, | |
| "learning_rate": 8.144020100502512e-06, | |
| "loss": 0.0076, | |
| "step": 18975 | |
| }, | |
| { | |
| "epoch": 9.57, | |
| "grad_norm": 1.7127236127853394, | |
| "learning_rate": 8.141507537688443e-06, | |
| "loss": 0.0071, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 9.57, | |
| "eval_loss": 0.2544167637825012, | |
| "eval_runtime": 643.4016, | |
| "eval_samples_per_second": 2.19, | |
| "eval_steps_per_second": 2.19, | |
| "eval_wer": 24.081632653061224, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "grad_norm": 2.526472806930542, | |
| "learning_rate": 8.138994974874373e-06, | |
| "loss": 0.0084, | |
| "step": 19025 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "grad_norm": 1.3105931282043457, | |
| "learning_rate": 8.136482412060302e-06, | |
| "loss": 0.0071, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 9.61, | |
| "grad_norm": 1.9968011379241943, | |
| "learning_rate": 8.133969849246231e-06, | |
| "loss": 0.0075, | |
| "step": 19075 | |
| }, | |
| { | |
| "epoch": 9.62, | |
| "grad_norm": 1.2742115259170532, | |
| "learning_rate": 8.131457286432162e-06, | |
| "loss": 0.0074, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "grad_norm": 1.062193512916565, | |
| "learning_rate": 8.12894472361809e-06, | |
| "loss": 0.0078, | |
| "step": 19125 | |
| }, | |
| { | |
| "epoch": 9.65, | |
| "grad_norm": 1.0094131231307983, | |
| "learning_rate": 8.126432160804021e-06, | |
| "loss": 0.0069, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 9.66, | |
| "grad_norm": 1.675285816192627, | |
| "learning_rate": 8.12391959798995e-06, | |
| "loss": 0.0082, | |
| "step": 19175 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "grad_norm": 1.96036696434021, | |
| "learning_rate": 8.12140703517588e-06, | |
| "loss": 0.0071, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 9.69, | |
| "grad_norm": 1.8763169050216675, | |
| "learning_rate": 8.11889447236181e-06, | |
| "loss": 0.0075, | |
| "step": 19225 | |
| }, | |
| { | |
| "epoch": 9.7, | |
| "grad_norm": 2.053239107131958, | |
| "learning_rate": 8.11638190954774e-06, | |
| "loss": 0.008, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 9.71, | |
| "grad_norm": 1.5086833238601685, | |
| "learning_rate": 8.11386934673367e-06, | |
| "loss": 0.0072, | |
| "step": 19275 | |
| }, | |
| { | |
| "epoch": 9.72, | |
| "grad_norm": 1.4810270071029663, | |
| "learning_rate": 8.111356783919599e-06, | |
| "loss": 0.0079, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 9.74, | |
| "grad_norm": 1.87246835231781, | |
| "learning_rate": 8.108844221105528e-06, | |
| "loss": 0.0082, | |
| "step": 19325 | |
| }, | |
| { | |
| "epoch": 9.75, | |
| "grad_norm": 2.0673673152923584, | |
| "learning_rate": 8.106331658291457e-06, | |
| "loss": 0.0075, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "grad_norm": 1.2569661140441895, | |
| "learning_rate": 8.103819095477388e-06, | |
| "loss": 0.0067, | |
| "step": 19375 | |
| }, | |
| { | |
| "epoch": 9.77, | |
| "grad_norm": 1.6111642122268677, | |
| "learning_rate": 8.101306532663318e-06, | |
| "loss": 0.0075, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 9.79, | |
| "grad_norm": 2.3493645191192627, | |
| "learning_rate": 8.098793969849247e-06, | |
| "loss": 0.0077, | |
| "step": 19425 | |
| }, | |
| { | |
| "epoch": 9.8, | |
| "grad_norm": 1.9058961868286133, | |
| "learning_rate": 8.096281407035176e-06, | |
| "loss": 0.0074, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 9.81, | |
| "grad_norm": 1.802046537399292, | |
| "learning_rate": 8.093768844221106e-06, | |
| "loss": 0.0073, | |
| "step": 19475 | |
| }, | |
| { | |
| "epoch": 9.82, | |
| "grad_norm": 1.5245673656463623, | |
| "learning_rate": 8.091256281407037e-06, | |
| "loss": 0.0067, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 9.84, | |
| "grad_norm": 1.501242756843567, | |
| "learning_rate": 8.088743718592966e-06, | |
| "loss": 0.0069, | |
| "step": 19525 | |
| }, | |
| { | |
| "epoch": 9.85, | |
| "grad_norm": 1.430893898010254, | |
| "learning_rate": 8.086231155778895e-06, | |
| "loss": 0.0083, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 9.86, | |
| "grad_norm": 1.6655915975570679, | |
| "learning_rate": 8.083718592964825e-06, | |
| "loss": 0.0067, | |
| "step": 19575 | |
| }, | |
| { | |
| "epoch": 9.87, | |
| "grad_norm": 1.6842190027236938, | |
| "learning_rate": 8.081206030150754e-06, | |
| "loss": 0.0068, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 9.89, | |
| "grad_norm": 2.076958179473877, | |
| "learning_rate": 8.078693467336685e-06, | |
| "loss": 0.0073, | |
| "step": 19625 | |
| }, | |
| { | |
| "epoch": 9.9, | |
| "grad_norm": 2.023754119873047, | |
| "learning_rate": 8.076180904522614e-06, | |
| "loss": 0.0081, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 9.91, | |
| "grad_norm": 1.3943392038345337, | |
| "learning_rate": 8.073668341708544e-06, | |
| "loss": 0.0078, | |
| "step": 19675 | |
| }, | |
| { | |
| "epoch": 9.92, | |
| "grad_norm": 2.7814877033233643, | |
| "learning_rate": 8.071155778894473e-06, | |
| "loss": 0.0079, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 9.94, | |
| "grad_norm": 1.6121410131454468, | |
| "learning_rate": 8.068643216080402e-06, | |
| "loss": 0.0077, | |
| "step": 19725 | |
| }, | |
| { | |
| "epoch": 9.95, | |
| "grad_norm": 1.514812707901001, | |
| "learning_rate": 8.066130653266332e-06, | |
| "loss": 0.0068, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 9.96, | |
| "grad_norm": 1.439288854598999, | |
| "learning_rate": 8.063618090452263e-06, | |
| "loss": 0.0076, | |
| "step": 19775 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "grad_norm": 0.9317819476127625, | |
| "learning_rate": 8.061105527638192e-06, | |
| "loss": 0.0069, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 9.99, | |
| "grad_norm": 1.976050615310669, | |
| "learning_rate": 8.058592964824121e-06, | |
| "loss": 0.0078, | |
| "step": 19825 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 1.0201984643936157, | |
| "learning_rate": 8.05608040201005e-06, | |
| "loss": 0.0072, | |
| "step": 19850 | |
| }, | |
| { | |
| "epoch": 10.01, | |
| "grad_norm": 0.900181233882904, | |
| "learning_rate": 8.05356783919598e-06, | |
| "loss": 0.0047, | |
| "step": 19875 | |
| }, | |
| { | |
| "epoch": 10.03, | |
| "grad_norm": 0.8394168019294739, | |
| "learning_rate": 8.051055276381911e-06, | |
| "loss": 0.0049, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 10.04, | |
| "grad_norm": 0.7329122424125671, | |
| "learning_rate": 8.04854271356784e-06, | |
| "loss": 0.0042, | |
| "step": 19925 | |
| }, | |
| { | |
| "epoch": 10.05, | |
| "grad_norm": 1.6395137310028076, | |
| "learning_rate": 8.04603015075377e-06, | |
| "loss": 0.0045, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 10.06, | |
| "grad_norm": 1.4006826877593994, | |
| "learning_rate": 8.043517587939699e-06, | |
| "loss": 0.0045, | |
| "step": 19975 | |
| }, | |
| { | |
| "epoch": 10.08, | |
| "grad_norm": 1.6509326696395874, | |
| "learning_rate": 8.041005025125628e-06, | |
| "loss": 0.0045, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 10.08, | |
| "eval_loss": 0.25544053316116333, | |
| "eval_runtime": 780.2624, | |
| "eval_samples_per_second": 1.806, | |
| "eval_steps_per_second": 1.806, | |
| "eval_wer": 23.528190937391905, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 10.09, | |
| "grad_norm": 1.6562331914901733, | |
| "learning_rate": 8.03849246231156e-06, | |
| "loss": 0.0046, | |
| "step": 20025 | |
| }, | |
| { | |
| "epoch": 10.1, | |
| "grad_norm": 1.7131171226501465, | |
| "learning_rate": 8.035979899497489e-06, | |
| "loss": 0.0051, | |
| "step": 20050 | |
| }, | |
| { | |
| "epoch": 10.11, | |
| "grad_norm": 1.3539303541183472, | |
| "learning_rate": 8.033467336683418e-06, | |
| "loss": 0.0041, | |
| "step": 20075 | |
| }, | |
| { | |
| "epoch": 10.13, | |
| "grad_norm": 1.5204474925994873, | |
| "learning_rate": 8.030954773869347e-06, | |
| "loss": 0.005, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 10.14, | |
| "grad_norm": 1.5538609027862549, | |
| "learning_rate": 8.028442211055277e-06, | |
| "loss": 0.005, | |
| "step": 20125 | |
| }, | |
| { | |
| "epoch": 10.15, | |
| "grad_norm": 1.281728744506836, | |
| "learning_rate": 8.025929648241206e-06, | |
| "loss": 0.0043, | |
| "step": 20150 | |
| }, | |
| { | |
| "epoch": 10.16, | |
| "grad_norm": 2.3059980869293213, | |
| "learning_rate": 8.023417085427137e-06, | |
| "loss": 0.0046, | |
| "step": 20175 | |
| }, | |
| { | |
| "epoch": 10.18, | |
| "grad_norm": 0.8961646556854248, | |
| "learning_rate": 8.020904522613066e-06, | |
| "loss": 0.0048, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 10.19, | |
| "grad_norm": 1.1319265365600586, | |
| "learning_rate": 8.018391959798996e-06, | |
| "loss": 0.0043, | |
| "step": 20225 | |
| }, | |
| { | |
| "epoch": 10.2, | |
| "grad_norm": 1.8678749799728394, | |
| "learning_rate": 8.015879396984927e-06, | |
| "loss": 0.0052, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 10.21, | |
| "grad_norm": 1.574100375175476, | |
| "learning_rate": 8.013366834170854e-06, | |
| "loss": 0.0048, | |
| "step": 20275 | |
| }, | |
| { | |
| "epoch": 10.23, | |
| "grad_norm": 1.676328182220459, | |
| "learning_rate": 8.010854271356785e-06, | |
| "loss": 0.0049, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 10.24, | |
| "grad_norm": 1.3840720653533936, | |
| "learning_rate": 8.008341708542714e-06, | |
| "loss": 0.005, | |
| "step": 20325 | |
| }, | |
| { | |
| "epoch": 10.25, | |
| "grad_norm": 1.3118586540222168, | |
| "learning_rate": 8.005829145728644e-06, | |
| "loss": 0.0054, | |
| "step": 20350 | |
| }, | |
| { | |
| "epoch": 10.26, | |
| "grad_norm": 1.8612523078918457, | |
| "learning_rate": 8.003316582914573e-06, | |
| "loss": 0.0049, | |
| "step": 20375 | |
| }, | |
| { | |
| "epoch": 10.28, | |
| "grad_norm": 2.0608303546905518, | |
| "learning_rate": 8.000804020100502e-06, | |
| "loss": 0.0053, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 10.29, | |
| "grad_norm": 1.2124433517456055, | |
| "learning_rate": 7.998291457286432e-06, | |
| "loss": 0.0053, | |
| "step": 20425 | |
| }, | |
| { | |
| "epoch": 10.3, | |
| "grad_norm": 1.3948249816894531, | |
| "learning_rate": 7.995778894472363e-06, | |
| "loss": 0.0047, | |
| "step": 20450 | |
| }, | |
| { | |
| "epoch": 10.31, | |
| "grad_norm": 1.4954756498336792, | |
| "learning_rate": 7.993266331658292e-06, | |
| "loss": 0.0051, | |
| "step": 20475 | |
| }, | |
| { | |
| "epoch": 10.33, | |
| "grad_norm": 1.2343194484710693, | |
| "learning_rate": 7.990753768844221e-06, | |
| "loss": 0.0056, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 10.34, | |
| "grad_norm": 2.3053460121154785, | |
| "learning_rate": 7.988241206030152e-06, | |
| "loss": 0.0048, | |
| "step": 20525 | |
| }, | |
| { | |
| "epoch": 10.35, | |
| "grad_norm": 1.0803661346435547, | |
| "learning_rate": 7.98572864321608e-06, | |
| "loss": 0.0051, | |
| "step": 20550 | |
| }, | |
| { | |
| "epoch": 10.37, | |
| "grad_norm": 1.7742104530334473, | |
| "learning_rate": 7.983216080402011e-06, | |
| "loss": 0.0055, | |
| "step": 20575 | |
| }, | |
| { | |
| "epoch": 10.38, | |
| "grad_norm": 1.462449073791504, | |
| "learning_rate": 7.98070351758794e-06, | |
| "loss": 0.0049, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 10.39, | |
| "grad_norm": 1.1726417541503906, | |
| "learning_rate": 7.97819095477387e-06, | |
| "loss": 0.0054, | |
| "step": 20625 | |
| }, | |
| { | |
| "epoch": 10.4, | |
| "grad_norm": 1.3419547080993652, | |
| "learning_rate": 7.975678391959799e-06, | |
| "loss": 0.0054, | |
| "step": 20650 | |
| }, | |
| { | |
| "epoch": 10.42, | |
| "grad_norm": 1.3675485849380493, | |
| "learning_rate": 7.973165829145728e-06, | |
| "loss": 0.0052, | |
| "step": 20675 | |
| }, | |
| { | |
| "epoch": 10.43, | |
| "grad_norm": 2.0673506259918213, | |
| "learning_rate": 7.97065326633166e-06, | |
| "loss": 0.0057, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 10.44, | |
| "grad_norm": 1.158771276473999, | |
| "learning_rate": 7.968140703517589e-06, | |
| "loss": 0.0052, | |
| "step": 20725 | |
| }, | |
| { | |
| "epoch": 10.45, | |
| "grad_norm": 0.8775469660758972, | |
| "learning_rate": 7.965628140703518e-06, | |
| "loss": 0.0051, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 10.47, | |
| "grad_norm": 2.4425106048583984, | |
| "learning_rate": 7.963115577889447e-06, | |
| "loss": 0.0056, | |
| "step": 20775 | |
| }, | |
| { | |
| "epoch": 10.48, | |
| "grad_norm": 1.8277249336242676, | |
| "learning_rate": 7.960603015075378e-06, | |
| "loss": 0.0055, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 10.49, | |
| "grad_norm": 1.406618595123291, | |
| "learning_rate": 7.958090452261306e-06, | |
| "loss": 0.0057, | |
| "step": 20825 | |
| }, | |
| { | |
| "epoch": 10.5, | |
| "grad_norm": 1.2207611799240112, | |
| "learning_rate": 7.955577889447237e-06, | |
| "loss": 0.0055, | |
| "step": 20850 | |
| }, | |
| { | |
| "epoch": 10.52, | |
| "grad_norm": 1.2478914260864258, | |
| "learning_rate": 7.953065326633166e-06, | |
| "loss": 0.0051, | |
| "step": 20875 | |
| }, | |
| { | |
| "epoch": 10.53, | |
| "grad_norm": 1.7296686172485352, | |
| "learning_rate": 7.950552763819096e-06, | |
| "loss": 0.0055, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 10.54, | |
| "grad_norm": 1.705731987953186, | |
| "learning_rate": 7.948040201005027e-06, | |
| "loss": 0.005, | |
| "step": 20925 | |
| }, | |
| { | |
| "epoch": 10.55, | |
| "grad_norm": 0.8736703991889954, | |
| "learning_rate": 7.945527638190954e-06, | |
| "loss": 0.0057, | |
| "step": 20950 | |
| }, | |
| { | |
| "epoch": 10.57, | |
| "grad_norm": 1.4896725416183472, | |
| "learning_rate": 7.943015075376885e-06, | |
| "loss": 0.0052, | |
| "step": 20975 | |
| }, | |
| { | |
| "epoch": 10.58, | |
| "grad_norm": 1.935691237449646, | |
| "learning_rate": 7.940502512562815e-06, | |
| "loss": 0.0055, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 10.58, | |
| "eval_loss": 0.2658803462982178, | |
| "eval_runtime": 641.6657, | |
| "eval_samples_per_second": 2.196, | |
| "eval_steps_per_second": 2.196, | |
| "eval_wer": 24.116222760290558, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 10.59, | |
| "grad_norm": 1.7740029096603394, | |
| "learning_rate": 7.937989949748744e-06, | |
| "loss": 0.0049, | |
| "step": 21025 | |
| }, | |
| { | |
| "epoch": 10.6, | |
| "grad_norm": 1.8998734951019287, | |
| "learning_rate": 7.935477386934673e-06, | |
| "loss": 0.0065, | |
| "step": 21050 | |
| }, | |
| { | |
| "epoch": 10.62, | |
| "grad_norm": 1.3813358545303345, | |
| "learning_rate": 7.932964824120604e-06, | |
| "loss": 0.006, | |
| "step": 21075 | |
| }, | |
| { | |
| "epoch": 10.63, | |
| "grad_norm": 1.8003945350646973, | |
| "learning_rate": 7.930452261306534e-06, | |
| "loss": 0.0055, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 10.64, | |
| "grad_norm": 1.7029547691345215, | |
| "learning_rate": 7.927939698492463e-06, | |
| "loss": 0.0044, | |
| "step": 21125 | |
| }, | |
| { | |
| "epoch": 10.65, | |
| "grad_norm": 2.208817720413208, | |
| "learning_rate": 7.925427135678392e-06, | |
| "loss": 0.0049, | |
| "step": 21150 | |
| }, | |
| { | |
| "epoch": 10.67, | |
| "grad_norm": 1.768268346786499, | |
| "learning_rate": 7.922914572864322e-06, | |
| "loss": 0.0057, | |
| "step": 21175 | |
| }, | |
| { | |
| "epoch": 10.68, | |
| "grad_norm": 1.4503201246261597, | |
| "learning_rate": 7.920402010050253e-06, | |
| "loss": 0.0054, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 10.69, | |
| "grad_norm": 1.497975468635559, | |
| "learning_rate": 7.91788944723618e-06, | |
| "loss": 0.0058, | |
| "step": 21225 | |
| }, | |
| { | |
| "epoch": 10.71, | |
| "grad_norm": 1.7116060256958008, | |
| "learning_rate": 7.915477386934674e-06, | |
| "loss": 0.0051, | |
| "step": 21250 | |
| }, | |
| { | |
| "epoch": 10.72, | |
| "grad_norm": 2.685547113418579, | |
| "learning_rate": 7.912964824120603e-06, | |
| "loss": 0.0062, | |
| "step": 21275 | |
| }, | |
| { | |
| "epoch": 10.73, | |
| "grad_norm": 1.4006340503692627, | |
| "learning_rate": 7.910452261306534e-06, | |
| "loss": 0.0052, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 10.74, | |
| "grad_norm": 1.3967355489730835, | |
| "learning_rate": 7.907939698492463e-06, | |
| "loss": 0.0053, | |
| "step": 21325 | |
| }, | |
| { | |
| "epoch": 10.76, | |
| "grad_norm": 2.3471438884735107, | |
| "learning_rate": 7.905427135678393e-06, | |
| "loss": 0.0064, | |
| "step": 21350 | |
| }, | |
| { | |
| "epoch": 10.77, | |
| "grad_norm": 1.9880576133728027, | |
| "learning_rate": 7.902914572864322e-06, | |
| "loss": 0.0049, | |
| "step": 21375 | |
| }, | |
| { | |
| "epoch": 10.78, | |
| "grad_norm": 1.6665552854537964, | |
| "learning_rate": 7.900402010050253e-06, | |
| "loss": 0.0055, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 10.79, | |
| "grad_norm": 1.3990721702575684, | |
| "learning_rate": 7.89788944723618e-06, | |
| "loss": 0.0061, | |
| "step": 21425 | |
| }, | |
| { | |
| "epoch": 10.81, | |
| "grad_norm": 1.7920498847961426, | |
| "learning_rate": 7.895376884422111e-06, | |
| "loss": 0.0053, | |
| "step": 21450 | |
| }, | |
| { | |
| "epoch": 10.82, | |
| "grad_norm": 2.0328104496002197, | |
| "learning_rate": 7.89286432160804e-06, | |
| "loss": 0.0055, | |
| "step": 21475 | |
| }, | |
| { | |
| "epoch": 10.83, | |
| "grad_norm": 1.8484914302825928, | |
| "learning_rate": 7.89035175879397e-06, | |
| "loss": 0.0057, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 10.84, | |
| "grad_norm": 1.1959257125854492, | |
| "learning_rate": 7.887839195979901e-06, | |
| "loss": 0.0053, | |
| "step": 21525 | |
| }, | |
| { | |
| "epoch": 10.86, | |
| "grad_norm": 1.821405291557312, | |
| "learning_rate": 7.885326633165829e-06, | |
| "loss": 0.005, | |
| "step": 21550 | |
| }, | |
| { | |
| "epoch": 10.87, | |
| "grad_norm": 1.9156001806259155, | |
| "learning_rate": 7.88281407035176e-06, | |
| "loss": 0.0053, | |
| "step": 21575 | |
| }, | |
| { | |
| "epoch": 10.88, | |
| "grad_norm": 2.608036518096924, | |
| "learning_rate": 7.880301507537689e-06, | |
| "loss": 0.0058, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 10.89, | |
| "grad_norm": 1.9512995481491089, | |
| "learning_rate": 7.877788944723618e-06, | |
| "loss": 0.0055, | |
| "step": 21625 | |
| }, | |
| { | |
| "epoch": 10.91, | |
| "grad_norm": 2.440284252166748, | |
| "learning_rate": 7.875276381909548e-06, | |
| "loss": 0.0056, | |
| "step": 21650 | |
| }, | |
| { | |
| "epoch": 10.92, | |
| "grad_norm": 1.5289582014083862, | |
| "learning_rate": 7.872763819095479e-06, | |
| "loss": 0.0063, | |
| "step": 21675 | |
| }, | |
| { | |
| "epoch": 10.93, | |
| "grad_norm": 1.6737768650054932, | |
| "learning_rate": 7.870251256281408e-06, | |
| "loss": 0.0051, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 10.94, | |
| "grad_norm": 1.3897589445114136, | |
| "learning_rate": 7.867738693467337e-06, | |
| "loss": 0.0057, | |
| "step": 21725 | |
| }, | |
| { | |
| "epoch": 10.96, | |
| "grad_norm": 2.573732852935791, | |
| "learning_rate": 7.865226130653267e-06, | |
| "loss": 0.0063, | |
| "step": 21750 | |
| }, | |
| { | |
| "epoch": 10.97, | |
| "grad_norm": 1.5241488218307495, | |
| "learning_rate": 7.862713567839196e-06, | |
| "loss": 0.006, | |
| "step": 21775 | |
| }, | |
| { | |
| "epoch": 10.98, | |
| "grad_norm": 1.3380444049835205, | |
| "learning_rate": 7.860201005025127e-06, | |
| "loss": 0.0049, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "grad_norm": 1.0626407861709595, | |
| "learning_rate": 7.857688442211055e-06, | |
| "loss": 0.0052, | |
| "step": 21825 | |
| }, | |
| { | |
| "epoch": 11.01, | |
| "grad_norm": 1.4017066955566406, | |
| "learning_rate": 7.855175879396986e-06, | |
| "loss": 0.0037, | |
| "step": 21850 | |
| }, | |
| { | |
| "epoch": 11.02, | |
| "grad_norm": 0.9792363047599792, | |
| "learning_rate": 7.852663316582915e-06, | |
| "loss": 0.0035, | |
| "step": 21875 | |
| }, | |
| { | |
| "epoch": 11.03, | |
| "grad_norm": 0.9123956561088562, | |
| "learning_rate": 7.850150753768844e-06, | |
| "loss": 0.0037, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 11.05, | |
| "grad_norm": 1.5364477634429932, | |
| "learning_rate": 7.847638190954775e-06, | |
| "loss": 0.0041, | |
| "step": 21925 | |
| }, | |
| { | |
| "epoch": 11.06, | |
| "grad_norm": 0.9540588855743408, | |
| "learning_rate": 7.845125628140705e-06, | |
| "loss": 0.0042, | |
| "step": 21950 | |
| }, | |
| { | |
| "epoch": 11.07, | |
| "grad_norm": 1.256990909576416, | |
| "learning_rate": 7.842613065326634e-06, | |
| "loss": 0.0037, | |
| "step": 21975 | |
| }, | |
| { | |
| "epoch": 11.08, | |
| "grad_norm": 1.1927049160003662, | |
| "learning_rate": 7.840100502512563e-06, | |
| "loss": 0.0034, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 11.08, | |
| "eval_loss": 0.27246958017349243, | |
| "eval_runtime": 638.6614, | |
| "eval_samples_per_second": 2.206, | |
| "eval_steps_per_second": 2.206, | |
| "eval_wer": 24.24766516776202, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 11.1, | |
| "grad_norm": 0.5147941708564758, | |
| "learning_rate": 7.837587939698493e-06, | |
| "loss": 0.0033, | |
| "step": 22025 | |
| }, | |
| { | |
| "epoch": 11.11, | |
| "grad_norm": 0.9543977379798889, | |
| "learning_rate": 7.835075376884422e-06, | |
| "loss": 0.0038, | |
| "step": 22050 | |
| }, | |
| { | |
| "epoch": 11.12, | |
| "grad_norm": 2.2430100440979004, | |
| "learning_rate": 7.832562814070353e-06, | |
| "loss": 0.0041, | |
| "step": 22075 | |
| }, | |
| { | |
| "epoch": 11.13, | |
| "grad_norm": 2.2202541828155518, | |
| "learning_rate": 7.830050251256282e-06, | |
| "loss": 0.0041, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 11.15, | |
| "grad_norm": 1.715667486190796, | |
| "learning_rate": 7.827537688442212e-06, | |
| "loss": 0.0039, | |
| "step": 22125 | |
| }, | |
| { | |
| "epoch": 11.16, | |
| "grad_norm": 1.9119547605514526, | |
| "learning_rate": 7.825025125628141e-06, | |
| "loss": 0.0041, | |
| "step": 22150 | |
| }, | |
| { | |
| "epoch": 11.17, | |
| "grad_norm": 1.6011847257614136, | |
| "learning_rate": 7.82251256281407e-06, | |
| "loss": 0.0042, | |
| "step": 22175 | |
| }, | |
| { | |
| "epoch": 11.18, | |
| "grad_norm": 1.1330533027648926, | |
| "learning_rate": 7.820000000000001e-06, | |
| "loss": 0.0037, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 11.2, | |
| "grad_norm": 1.442896842956543, | |
| "learning_rate": 7.81748743718593e-06, | |
| "loss": 0.004, | |
| "step": 22225 | |
| }, | |
| { | |
| "epoch": 11.21, | |
| "grad_norm": 1.1626893281936646, | |
| "learning_rate": 7.81497487437186e-06, | |
| "loss": 0.0041, | |
| "step": 22250 | |
| }, | |
| { | |
| "epoch": 11.22, | |
| "grad_norm": 1.245456576347351, | |
| "learning_rate": 7.81246231155779e-06, | |
| "loss": 0.0039, | |
| "step": 22275 | |
| }, | |
| { | |
| "epoch": 11.23, | |
| "grad_norm": 1.619498372077942, | |
| "learning_rate": 7.809949748743719e-06, | |
| "loss": 0.0037, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 11.25, | |
| "grad_norm": 1.3442145586013794, | |
| "learning_rate": 7.80743718592965e-06, | |
| "loss": 0.004, | |
| "step": 22325 | |
| }, | |
| { | |
| "epoch": 11.26, | |
| "grad_norm": 1.6045056581497192, | |
| "learning_rate": 7.804924623115579e-06, | |
| "loss": 0.0039, | |
| "step": 22350 | |
| }, | |
| { | |
| "epoch": 11.27, | |
| "grad_norm": 1.1371634006500244, | |
| "learning_rate": 7.802412060301508e-06, | |
| "loss": 0.0041, | |
| "step": 22375 | |
| }, | |
| { | |
| "epoch": 11.28, | |
| "grad_norm": 2.101292848587036, | |
| "learning_rate": 7.799899497487438e-06, | |
| "loss": 0.0039, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 11.3, | |
| "grad_norm": 1.0439993143081665, | |
| "learning_rate": 7.797386934673367e-06, | |
| "loss": 0.0041, | |
| "step": 22425 | |
| }, | |
| { | |
| "epoch": 11.31, | |
| "grad_norm": 1.2077866792678833, | |
| "learning_rate": 7.794874371859296e-06, | |
| "loss": 0.0042, | |
| "step": 22450 | |
| }, | |
| { | |
| "epoch": 11.32, | |
| "grad_norm": 1.229236364364624, | |
| "learning_rate": 7.792361809045227e-06, | |
| "loss": 0.0041, | |
| "step": 22475 | |
| }, | |
| { | |
| "epoch": 11.34, | |
| "grad_norm": 1.1711903810501099, | |
| "learning_rate": 7.789849246231157e-06, | |
| "loss": 0.0046, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 11.35, | |
| "grad_norm": 1.5802088975906372, | |
| "learning_rate": 7.787336683417086e-06, | |
| "loss": 0.0044, | |
| "step": 22525 | |
| }, | |
| { | |
| "epoch": 11.36, | |
| "grad_norm": 1.4556586742401123, | |
| "learning_rate": 7.784824120603017e-06, | |
| "loss": 0.0042, | |
| "step": 22550 | |
| }, | |
| { | |
| "epoch": 11.37, | |
| "grad_norm": 2.2199254035949707, | |
| "learning_rate": 7.782311557788945e-06, | |
| "loss": 0.004, | |
| "step": 22575 | |
| }, | |
| { | |
| "epoch": 11.39, | |
| "grad_norm": 1.7703883647918701, | |
| "learning_rate": 7.779798994974876e-06, | |
| "loss": 0.0039, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 11.4, | |
| "grad_norm": 3.0892791748046875, | |
| "learning_rate": 7.777286432160805e-06, | |
| "loss": 0.004, | |
| "step": 22625 | |
| }, | |
| { | |
| "epoch": 11.41, | |
| "grad_norm": 2.2085580825805664, | |
| "learning_rate": 7.774773869346734e-06, | |
| "loss": 0.0039, | |
| "step": 22650 | |
| }, | |
| { | |
| "epoch": 11.42, | |
| "grad_norm": 1.9577640295028687, | |
| "learning_rate": 7.772261306532664e-06, | |
| "loss": 0.0043, | |
| "step": 22675 | |
| }, | |
| { | |
| "epoch": 11.44, | |
| "grad_norm": 2.051767349243164, | |
| "learning_rate": 7.769748743718593e-06, | |
| "loss": 0.0044, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 11.45, | |
| "grad_norm": 0.8601694703102112, | |
| "learning_rate": 7.767236180904522e-06, | |
| "loss": 0.0041, | |
| "step": 22725 | |
| }, | |
| { | |
| "epoch": 11.46, | |
| "grad_norm": 1.3375483751296997, | |
| "learning_rate": 7.764723618090453e-06, | |
| "loss": 0.0044, | |
| "step": 22750 | |
| }, | |
| { | |
| "epoch": 11.47, | |
| "grad_norm": 1.3985751867294312, | |
| "learning_rate": 7.762211055276383e-06, | |
| "loss": 0.0042, | |
| "step": 22775 | |
| }, | |
| { | |
| "epoch": 11.49, | |
| "grad_norm": 0.8720707297325134, | |
| "learning_rate": 7.759698492462312e-06, | |
| "loss": 0.0047, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 11.5, | |
| "grad_norm": 0.9330138564109802, | |
| "learning_rate": 7.757185929648243e-06, | |
| "loss": 0.0039, | |
| "step": 22825 | |
| }, | |
| { | |
| "epoch": 11.51, | |
| "grad_norm": 1.2324626445770264, | |
| "learning_rate": 7.75467336683417e-06, | |
| "loss": 0.0041, | |
| "step": 22850 | |
| }, | |
| { | |
| "epoch": 11.52, | |
| "grad_norm": 1.7098289728164673, | |
| "learning_rate": 7.752160804020102e-06, | |
| "loss": 0.0043, | |
| "step": 22875 | |
| }, | |
| { | |
| "epoch": 11.54, | |
| "grad_norm": 1.247007131576538, | |
| "learning_rate": 7.749648241206031e-06, | |
| "loss": 0.0041, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 11.55, | |
| "grad_norm": 1.9044662714004517, | |
| "learning_rate": 7.74713567839196e-06, | |
| "loss": 0.0046, | |
| "step": 22925 | |
| }, | |
| { | |
| "epoch": 11.56, | |
| "grad_norm": 2.0409295558929443, | |
| "learning_rate": 7.744623115577891e-06, | |
| "loss": 0.0043, | |
| "step": 22950 | |
| }, | |
| { | |
| "epoch": 11.57, | |
| "grad_norm": 2.2986319065093994, | |
| "learning_rate": 7.742110552763819e-06, | |
| "loss": 0.0045, | |
| "step": 22975 | |
| }, | |
| { | |
| "epoch": 11.59, | |
| "grad_norm": 2.2678630352020264, | |
| "learning_rate": 7.73959798994975e-06, | |
| "loss": 0.0042, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 11.59, | |
| "eval_loss": 0.28508278727531433, | |
| "eval_runtime": 644.32, | |
| "eval_samples_per_second": 2.187, | |
| "eval_steps_per_second": 2.187, | |
| "eval_wer": 24.05396056727776, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 11.6, | |
| "grad_norm": 1.907650113105774, | |
| "learning_rate": 7.73708542713568e-06, | |
| "loss": 0.0041, | |
| "step": 23025 | |
| }, | |
| { | |
| "epoch": 11.61, | |
| "grad_norm": 0.9967373013496399, | |
| "learning_rate": 7.734572864321609e-06, | |
| "loss": 0.0044, | |
| "step": 23050 | |
| }, | |
| { | |
| "epoch": 11.62, | |
| "grad_norm": 1.4243005514144897, | |
| "learning_rate": 7.732060301507538e-06, | |
| "loss": 0.0041, | |
| "step": 23075 | |
| }, | |
| { | |
| "epoch": 11.64, | |
| "grad_norm": 1.480535864830017, | |
| "learning_rate": 7.729547738693469e-06, | |
| "loss": 0.0041, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 11.65, | |
| "grad_norm": 1.5228785276412964, | |
| "learning_rate": 7.727035175879396e-06, | |
| "loss": 0.0048, | |
| "step": 23125 | |
| }, | |
| { | |
| "epoch": 11.66, | |
| "grad_norm": 1.2098692655563354, | |
| "learning_rate": 7.724522613065328e-06, | |
| "loss": 0.0041, | |
| "step": 23150 | |
| }, | |
| { | |
| "epoch": 11.68, | |
| "grad_norm": 1.1572978496551514, | |
| "learning_rate": 7.722010050251257e-06, | |
| "loss": 0.0044, | |
| "step": 23175 | |
| }, | |
| { | |
| "epoch": 11.69, | |
| "grad_norm": 1.1368253231048584, | |
| "learning_rate": 7.719497487437186e-06, | |
| "loss": 0.0038, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 11.7, | |
| "grad_norm": 1.6968629360198975, | |
| "learning_rate": 7.716984924623117e-06, | |
| "loss": 0.004, | |
| "step": 23225 | |
| }, | |
| { | |
| "epoch": 11.71, | |
| "grad_norm": 1.8138012886047363, | |
| "learning_rate": 7.714472361809045e-06, | |
| "loss": 0.0041, | |
| "step": 23250 | |
| }, | |
| { | |
| "epoch": 11.73, | |
| "grad_norm": 1.3682103157043457, | |
| "learning_rate": 7.711959798994976e-06, | |
| "loss": 0.0048, | |
| "step": 23275 | |
| }, | |
| { | |
| "epoch": 11.74, | |
| "grad_norm": 2.3074638843536377, | |
| "learning_rate": 7.709447236180905e-06, | |
| "loss": 0.0043, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 11.75, | |
| "grad_norm": 1.544771432876587, | |
| "learning_rate": 7.706934673366834e-06, | |
| "loss": 0.0048, | |
| "step": 23325 | |
| }, | |
| { | |
| "epoch": 11.76, | |
| "grad_norm": 1.589612603187561, | |
| "learning_rate": 7.704422110552764e-06, | |
| "loss": 0.0043, | |
| "step": 23350 | |
| }, | |
| { | |
| "epoch": 11.78, | |
| "grad_norm": 1.983675241470337, | |
| "learning_rate": 7.701909547738695e-06, | |
| "loss": 0.0042, | |
| "step": 23375 | |
| }, | |
| { | |
| "epoch": 11.79, | |
| "grad_norm": 1.3556252717971802, | |
| "learning_rate": 7.699396984924624e-06, | |
| "loss": 0.0043, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 11.8, | |
| "grad_norm": 1.4705439805984497, | |
| "learning_rate": 7.696884422110553e-06, | |
| "loss": 0.0048, | |
| "step": 23425 | |
| }, | |
| { | |
| "epoch": 11.81, | |
| "grad_norm": 1.4444918632507324, | |
| "learning_rate": 7.694371859296483e-06, | |
| "loss": 0.0047, | |
| "step": 23450 | |
| }, | |
| { | |
| "epoch": 11.83, | |
| "grad_norm": 1.6264967918395996, | |
| "learning_rate": 7.691859296482412e-06, | |
| "loss": 0.0046, | |
| "step": 23475 | |
| }, | |
| { | |
| "epoch": 11.84, | |
| "grad_norm": 1.275970697402954, | |
| "learning_rate": 7.689346733668343e-06, | |
| "loss": 0.0044, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 11.85, | |
| "grad_norm": 1.5057625770568848, | |
| "learning_rate": 7.68683417085427e-06, | |
| "loss": 0.0042, | |
| "step": 23525 | |
| }, | |
| { | |
| "epoch": 11.86, | |
| "grad_norm": 1.7427964210510254, | |
| "learning_rate": 7.684321608040202e-06, | |
| "loss": 0.0047, | |
| "step": 23550 | |
| }, | |
| { | |
| "epoch": 11.88, | |
| "grad_norm": NaN, | |
| "learning_rate": 7.681909547738693e-06, | |
| "loss": 0.0047, | |
| "step": 23575 | |
| }, | |
| { | |
| "epoch": 11.89, | |
| "grad_norm": 2.0153872966766357, | |
| "learning_rate": 7.679396984924624e-06, | |
| "loss": 0.0045, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 11.9, | |
| "grad_norm": 1.4703736305236816, | |
| "learning_rate": 7.676884422110554e-06, | |
| "loss": 0.005, | |
| "step": 23625 | |
| }, | |
| { | |
| "epoch": 11.91, | |
| "grad_norm": 2.2874040603637695, | |
| "learning_rate": 7.674371859296483e-06, | |
| "loss": 0.0048, | |
| "step": 23650 | |
| }, | |
| { | |
| "epoch": 11.93, | |
| "grad_norm": 2.741128921508789, | |
| "learning_rate": 7.671859296482412e-06, | |
| "loss": 0.0043, | |
| "step": 23675 | |
| }, | |
| { | |
| "epoch": 11.94, | |
| "grad_norm": 1.3430378437042236, | |
| "learning_rate": 7.669346733668343e-06, | |
| "loss": 0.0042, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 11.95, | |
| "grad_norm": 1.0754338502883911, | |
| "learning_rate": 7.666834170854271e-06, | |
| "loss": 0.0049, | |
| "step": 23725 | |
| }, | |
| { | |
| "epoch": 11.96, | |
| "grad_norm": 1.6235313415527344, | |
| "learning_rate": 7.664321608040202e-06, | |
| "loss": 0.0046, | |
| "step": 23750 | |
| }, | |
| { | |
| "epoch": 11.98, | |
| "grad_norm": 1.4375765323638916, | |
| "learning_rate": 7.661809045226131e-06, | |
| "loss": 0.0044, | |
| "step": 23775 | |
| }, | |
| { | |
| "epoch": 11.99, | |
| "grad_norm": 1.9673402309417725, | |
| "learning_rate": 7.65929648241206e-06, | |
| "loss": 0.0047, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 2.2287938594818115, | |
| "learning_rate": 7.656783919597992e-06, | |
| "loss": 0.004, | |
| "step": 23825 | |
| }, | |
| { | |
| "epoch": 12.02, | |
| "grad_norm": 1.1054763793945312, | |
| "learning_rate": 7.65427135678392e-06, | |
| "loss": 0.0037, | |
| "step": 23850 | |
| }, | |
| { | |
| "epoch": 12.03, | |
| "grad_norm": 0.7484707236289978, | |
| "learning_rate": 7.65175879396985e-06, | |
| "loss": 0.0033, | |
| "step": 23875 | |
| }, | |
| { | |
| "epoch": 12.04, | |
| "grad_norm": 1.4415768384933472, | |
| "learning_rate": 7.64924623115578e-06, | |
| "loss": 0.0028, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 12.05, | |
| "grad_norm": 0.6510108709335327, | |
| "learning_rate": 7.646733668341709e-06, | |
| "loss": 0.0029, | |
| "step": 23925 | |
| }, | |
| { | |
| "epoch": 12.07, | |
| "grad_norm": 0.967505156993866, | |
| "learning_rate": 7.644221105527638e-06, | |
| "loss": 0.0028, | |
| "step": 23950 | |
| }, | |
| { | |
| "epoch": 12.08, | |
| "grad_norm": 0.8767016530036926, | |
| "learning_rate": 7.64170854271357e-06, | |
| "loss": 0.0027, | |
| "step": 23975 | |
| }, | |
| { | |
| "epoch": 12.09, | |
| "grad_norm": 1.4679666757583618, | |
| "learning_rate": 7.639195979899499e-06, | |
| "loss": 0.0031, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 12.09, | |
| "eval_loss": 0.28861406445503235, | |
| "eval_runtime": 647.0257, | |
| "eval_samples_per_second": 2.178, | |
| "eval_steps_per_second": 2.178, | |
| "eval_wer": 23.825665859564165, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 12.1, | |
| "grad_norm": 3.264605760574341, | |
| "learning_rate": 7.636683417085428e-06, | |
| "loss": 0.0035, | |
| "step": 24025 | |
| }, | |
| { | |
| "epoch": 12.12, | |
| "grad_norm": 1.3971178531646729, | |
| "learning_rate": 7.634170854271357e-06, | |
| "loss": 0.0033, | |
| "step": 24050 | |
| }, | |
| { | |
| "epoch": 12.13, | |
| "grad_norm": 1.240206003189087, | |
| "learning_rate": 7.631658291457287e-06, | |
| "loss": 0.003, | |
| "step": 24075 | |
| }, | |
| { | |
| "epoch": 12.14, | |
| "grad_norm": 1.3925122022628784, | |
| "learning_rate": 7.629145728643217e-06, | |
| "loss": 0.003, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 12.15, | |
| "grad_norm": 0.912455677986145, | |
| "learning_rate": 7.626633165829146e-06, | |
| "loss": 0.0029, | |
| "step": 24125 | |
| }, | |
| { | |
| "epoch": 12.17, | |
| "grad_norm": 1.508727788925171, | |
| "learning_rate": 7.624120603015076e-06, | |
| "loss": 0.0029, | |
| "step": 24150 | |
| }, | |
| { | |
| "epoch": 12.18, | |
| "grad_norm": 1.45113205909729, | |
| "learning_rate": 7.621608040201006e-06, | |
| "loss": 0.0035, | |
| "step": 24175 | |
| }, | |
| { | |
| "epoch": 12.19, | |
| "grad_norm": 1.3699947595596313, | |
| "learning_rate": 7.619095477386935e-06, | |
| "loss": 0.0029, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 12.2, | |
| "grad_norm": 1.319270133972168, | |
| "learning_rate": 7.616582914572865e-06, | |
| "loss": 0.0034, | |
| "step": 24225 | |
| }, | |
| { | |
| "epoch": 12.22, | |
| "grad_norm": 1.116363286972046, | |
| "learning_rate": 7.614070351758794e-06, | |
| "loss": 0.0034, | |
| "step": 24250 | |
| }, | |
| { | |
| "epoch": 12.23, | |
| "grad_norm": 1.1260015964508057, | |
| "learning_rate": 7.6115577889447245e-06, | |
| "loss": 0.0034, | |
| "step": 24275 | |
| }, | |
| { | |
| "epoch": 12.24, | |
| "grad_norm": 1.9116740226745605, | |
| "learning_rate": 7.609045226130654e-06, | |
| "loss": 0.004, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 12.25, | |
| "grad_norm": 2.0611562728881836, | |
| "learning_rate": 7.606532663316584e-06, | |
| "loss": 0.003, | |
| "step": 24325 | |
| }, | |
| { | |
| "epoch": 12.27, | |
| "grad_norm": 1.186975359916687, | |
| "learning_rate": 7.6040201005025125e-06, | |
| "loss": 0.0033, | |
| "step": 24350 | |
| }, | |
| { | |
| "epoch": 12.28, | |
| "grad_norm": 1.219438076019287, | |
| "learning_rate": 7.601507537688443e-06, | |
| "loss": 0.0034, | |
| "step": 24375 | |
| }, | |
| { | |
| "epoch": 12.29, | |
| "grad_norm": 1.2265121936798096, | |
| "learning_rate": 7.598994974874373e-06, | |
| "loss": 0.0032, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 12.3, | |
| "grad_norm": 1.6346076726913452, | |
| "learning_rate": 7.596482412060302e-06, | |
| "loss": 0.0034, | |
| "step": 24425 | |
| }, | |
| { | |
| "epoch": 12.32, | |
| "grad_norm": 0.8696354031562805, | |
| "learning_rate": 7.593969849246232e-06, | |
| "loss": 0.0031, | |
| "step": 24450 | |
| }, | |
| { | |
| "epoch": 12.33, | |
| "grad_norm": 1.4290732145309448, | |
| "learning_rate": 7.591457286432161e-06, | |
| "loss": 0.0037, | |
| "step": 24475 | |
| }, | |
| { | |
| "epoch": 12.34, | |
| "grad_norm": 2.6827902793884277, | |
| "learning_rate": 7.588944723618091e-06, | |
| "loss": 0.0038, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 12.36, | |
| "grad_norm": 1.816667079925537, | |
| "learning_rate": 7.58643216080402e-06, | |
| "loss": 0.0036, | |
| "step": 24525 | |
| }, | |
| { | |
| "epoch": 12.37, | |
| "grad_norm": 0.5981873869895935, | |
| "learning_rate": 7.5839195979899505e-06, | |
| "loss": 0.0027, | |
| "step": 24550 | |
| }, | |
| { | |
| "epoch": 12.38, | |
| "grad_norm": 2.2105541229248047, | |
| "learning_rate": 7.58140703517588e-06, | |
| "loss": 0.0035, | |
| "step": 24575 | |
| }, | |
| { | |
| "epoch": 12.39, | |
| "grad_norm": 1.0556033849716187, | |
| "learning_rate": 7.57889447236181e-06, | |
| "loss": 0.0032, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 12.41, | |
| "grad_norm": 1.0780327320098877, | |
| "learning_rate": 7.57638190954774e-06, | |
| "loss": 0.0035, | |
| "step": 24625 | |
| }, | |
| { | |
| "epoch": 12.42, | |
| "grad_norm": 1.9094908237457275, | |
| "learning_rate": 7.573869346733669e-06, | |
| "loss": 0.0036, | |
| "step": 24650 | |
| }, | |
| { | |
| "epoch": 12.43, | |
| "grad_norm": 1.2956584692001343, | |
| "learning_rate": 7.571356783919599e-06, | |
| "loss": 0.0044, | |
| "step": 24675 | |
| }, | |
| { | |
| "epoch": 12.44, | |
| "grad_norm": 1.5782350301742554, | |
| "learning_rate": 7.568844221105528e-06, | |
| "loss": 0.0031, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 12.46, | |
| "grad_norm": 1.1655892133712769, | |
| "learning_rate": 7.566331658291458e-06, | |
| "loss": 0.0033, | |
| "step": 24725 | |
| }, | |
| { | |
| "epoch": 12.47, | |
| "grad_norm": 1.4031734466552734, | |
| "learning_rate": 7.563819095477387e-06, | |
| "loss": 0.0034, | |
| "step": 24750 | |
| }, | |
| { | |
| "epoch": 12.48, | |
| "grad_norm": 1.4916791915893555, | |
| "learning_rate": 7.561306532663317e-06, | |
| "loss": 0.0035, | |
| "step": 24775 | |
| }, | |
| { | |
| "epoch": 12.49, | |
| "grad_norm": 0.40038520097732544, | |
| "learning_rate": 7.558793969849247e-06, | |
| "loss": 0.0035, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 12.51, | |
| "grad_norm": 1.3992714881896973, | |
| "learning_rate": 7.556281407035176e-06, | |
| "loss": 0.0033, | |
| "step": 24825 | |
| }, | |
| { | |
| "epoch": 12.52, | |
| "grad_norm": 0.48557183146476746, | |
| "learning_rate": 7.5537688442211066e-06, | |
| "loss": 0.004, | |
| "step": 24850 | |
| }, | |
| { | |
| "epoch": 12.53, | |
| "grad_norm": 1.111877679824829, | |
| "learning_rate": 7.551256281407036e-06, | |
| "loss": 0.0033, | |
| "step": 24875 | |
| }, | |
| { | |
| "epoch": 12.54, | |
| "grad_norm": 1.1091711521148682, | |
| "learning_rate": 7.548743718592966e-06, | |
| "loss": 0.0034, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 12.56, | |
| "grad_norm": 1.899695634841919, | |
| "learning_rate": 7.5462311557788945e-06, | |
| "loss": 0.0039, | |
| "step": 24925 | |
| }, | |
| { | |
| "epoch": 12.57, | |
| "grad_norm": 1.4109854698181152, | |
| "learning_rate": 7.543718592964825e-06, | |
| "loss": 0.0033, | |
| "step": 24950 | |
| }, | |
| { | |
| "epoch": 12.58, | |
| "grad_norm": 1.7029036283493042, | |
| "learning_rate": 7.541206030150754e-06, | |
| "loss": 0.0032, | |
| "step": 24975 | |
| }, | |
| { | |
| "epoch": 12.59, | |
| "grad_norm": 1.6713786125183105, | |
| "learning_rate": 7.538693467336684e-06, | |
| "loss": 0.0035, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 12.59, | |
| "eval_loss": 0.29140138626098633, | |
| "eval_runtime": 645.0337, | |
| "eval_samples_per_second": 2.184, | |
| "eval_steps_per_second": 2.184, | |
| "eval_wer": 24.399861639571082, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 12.61, | |
| "grad_norm": 2.3478965759277344, | |
| "learning_rate": 7.536180904522614e-06, | |
| "loss": 0.0034, | |
| "step": 25025 | |
| }, | |
| { | |
| "epoch": 12.62, | |
| "grad_norm": 0.9575018286705017, | |
| "learning_rate": 7.533668341708543e-06, | |
| "loss": 0.0032, | |
| "step": 25050 | |
| }, | |
| { | |
| "epoch": 12.63, | |
| "grad_norm": 0.94124835729599, | |
| "learning_rate": 7.531155778894473e-06, | |
| "loss": 0.0034, | |
| "step": 25075 | |
| }, | |
| { | |
| "epoch": 12.64, | |
| "grad_norm": 1.8110922574996948, | |
| "learning_rate": 7.528643216080402e-06, | |
| "loss": 0.0034, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 12.66, | |
| "grad_norm": 1.0863940715789795, | |
| "learning_rate": 7.5261306532663325e-06, | |
| "loss": 0.0034, | |
| "step": 25125 | |
| }, | |
| { | |
| "epoch": 12.67, | |
| "grad_norm": 1.8619202375411987, | |
| "learning_rate": 7.523618090452262e-06, | |
| "loss": 0.0037, | |
| "step": 25150 | |
| }, | |
| { | |
| "epoch": 12.68, | |
| "grad_norm": 1.9385497570037842, | |
| "learning_rate": 7.521105527638192e-06, | |
| "loss": 0.0034, | |
| "step": 25175 | |
| }, | |
| { | |
| "epoch": 12.7, | |
| "grad_norm": 1.8523273468017578, | |
| "learning_rate": 7.5185929648241205e-06, | |
| "loss": 0.0035, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 12.71, | |
| "grad_norm": 1.9459110498428345, | |
| "learning_rate": 7.516080402010051e-06, | |
| "loss": 0.0034, | |
| "step": 25225 | |
| }, | |
| { | |
| "epoch": 12.72, | |
| "grad_norm": 1.6162514686584473, | |
| "learning_rate": 7.513567839195981e-06, | |
| "loss": 0.0041, | |
| "step": 25250 | |
| }, | |
| { | |
| "epoch": 12.73, | |
| "grad_norm": 1.7794193029403687, | |
| "learning_rate": 7.51105527638191e-06, | |
| "loss": 0.0037, | |
| "step": 25275 | |
| }, | |
| { | |
| "epoch": 12.75, | |
| "grad_norm": 1.8779551982879639, | |
| "learning_rate": 7.50854271356784e-06, | |
| "loss": 0.0038, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 12.76, | |
| "grad_norm": 1.2835701704025269, | |
| "learning_rate": 7.506030150753769e-06, | |
| "loss": 0.0035, | |
| "step": 25325 | |
| }, | |
| { | |
| "epoch": 12.77, | |
| "grad_norm": 1.1003650426864624, | |
| "learning_rate": 7.503517587939699e-06, | |
| "loss": 0.0037, | |
| "step": 25350 | |
| }, | |
| { | |
| "epoch": 12.78, | |
| "grad_norm": 1.345335602760315, | |
| "learning_rate": 7.501005025125628e-06, | |
| "loss": 0.0036, | |
| "step": 25375 | |
| }, | |
| { | |
| "epoch": 12.8, | |
| "grad_norm": 0.9257469177246094, | |
| "learning_rate": 7.4984924623115585e-06, | |
| "loss": 0.0032, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 12.81, | |
| "grad_norm": 1.3247835636138916, | |
| "learning_rate": 7.495979899497488e-06, | |
| "loss": 0.0042, | |
| "step": 25425 | |
| }, | |
| { | |
| "epoch": 12.82, | |
| "grad_norm": 1.658199429512024, | |
| "learning_rate": 7.493467336683418e-06, | |
| "loss": 0.0038, | |
| "step": 25450 | |
| }, | |
| { | |
| "epoch": 12.83, | |
| "grad_norm": 1.2152585983276367, | |
| "learning_rate": 7.490954773869348e-06, | |
| "loss": 0.0035, | |
| "step": 25475 | |
| }, | |
| { | |
| "epoch": 12.85, | |
| "grad_norm": 1.6380772590637207, | |
| "learning_rate": 7.488442211055277e-06, | |
| "loss": 0.0041, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 12.86, | |
| "grad_norm": 1.4688934087753296, | |
| "learning_rate": 7.485929648241207e-06, | |
| "loss": 0.0035, | |
| "step": 25525 | |
| }, | |
| { | |
| "epoch": 12.87, | |
| "grad_norm": 2.3192384243011475, | |
| "learning_rate": 7.483417085427136e-06, | |
| "loss": 0.004, | |
| "step": 25550 | |
| }, | |
| { | |
| "epoch": 12.88, | |
| "grad_norm": 1.0542503595352173, | |
| "learning_rate": 7.480904522613066e-06, | |
| "loss": 0.0038, | |
| "step": 25575 | |
| }, | |
| { | |
| "epoch": 12.9, | |
| "grad_norm": 1.6845927238464355, | |
| "learning_rate": 7.478391959798995e-06, | |
| "loss": 0.0034, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 12.91, | |
| "grad_norm": 1.3695615530014038, | |
| "learning_rate": 7.475879396984925e-06, | |
| "loss": 0.0034, | |
| "step": 25625 | |
| }, | |
| { | |
| "epoch": 12.92, | |
| "grad_norm": 2.099348306655884, | |
| "learning_rate": 7.473366834170855e-06, | |
| "loss": 0.0041, | |
| "step": 25650 | |
| }, | |
| { | |
| "epoch": 12.93, | |
| "grad_norm": 1.4898931980133057, | |
| "learning_rate": 7.470854271356784e-06, | |
| "loss": 0.0037, | |
| "step": 25675 | |
| }, | |
| { | |
| "epoch": 12.95, | |
| "grad_norm": 1.583161473274231, | |
| "learning_rate": 7.4683417085427146e-06, | |
| "loss": 0.0037, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 12.96, | |
| "grad_norm": 0.7393112778663635, | |
| "learning_rate": 7.465829145728644e-06, | |
| "loss": 0.0042, | |
| "step": 25725 | |
| }, | |
| { | |
| "epoch": 12.97, | |
| "grad_norm": 1.3594324588775635, | |
| "learning_rate": 7.463316582914574e-06, | |
| "loss": 0.0034, | |
| "step": 25750 | |
| }, | |
| { | |
| "epoch": 12.98, | |
| "grad_norm": 1.8096977472305298, | |
| "learning_rate": 7.4608040201005025e-06, | |
| "loss": 0.0041, | |
| "step": 25775 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 1.482646107673645, | |
| "learning_rate": 7.458291457286433e-06, | |
| "loss": 0.0036, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 13.01, | |
| "grad_norm": 0.5999165177345276, | |
| "learning_rate": 7.455778894472362e-06, | |
| "loss": 0.0026, | |
| "step": 25825 | |
| }, | |
| { | |
| "epoch": 13.02, | |
| "grad_norm": 0.5373625159263611, | |
| "learning_rate": 7.453266331658292e-06, | |
| "loss": 0.0026, | |
| "step": 25850 | |
| }, | |
| { | |
| "epoch": 13.04, | |
| "grad_norm": 1.5365161895751953, | |
| "learning_rate": 7.450753768844222e-06, | |
| "loss": 0.0025, | |
| "step": 25875 | |
| }, | |
| { | |
| "epoch": 13.05, | |
| "grad_norm": 0.9741165637969971, | |
| "learning_rate": 7.448241206030151e-06, | |
| "loss": 0.0025, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 13.06, | |
| "grad_norm": 1.167653203010559, | |
| "learning_rate": 7.445728643216081e-06, | |
| "loss": 0.0027, | |
| "step": 25925 | |
| }, | |
| { | |
| "epoch": 13.07, | |
| "grad_norm": 0.48862722516059875, | |
| "learning_rate": 7.44321608040201e-06, | |
| "loss": 0.0024, | |
| "step": 25950 | |
| }, | |
| { | |
| "epoch": 13.09, | |
| "grad_norm": 0.701203465461731, | |
| "learning_rate": 7.4407035175879405e-06, | |
| "loss": 0.0024, | |
| "step": 25975 | |
| }, | |
| { | |
| "epoch": 13.1, | |
| "grad_norm": 0.8814527988433838, | |
| "learning_rate": 7.43819095477387e-06, | |
| "loss": 0.0022, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 13.1, | |
| "eval_loss": 0.29991698265075684, | |
| "eval_runtime": 640.3865, | |
| "eval_samples_per_second": 2.2, | |
| "eval_steps_per_second": 2.2, | |
| "eval_wer": 23.86717398823936, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 13.11, | |
| "grad_norm": 1.1581878662109375, | |
| "learning_rate": 7.4356783919598e-06, | |
| "loss": 0.0021, | |
| "step": 26025 | |
| }, | |
| { | |
| "epoch": 13.12, | |
| "grad_norm": 0.3536165654659271, | |
| "learning_rate": 7.4331658291457285e-06, | |
| "loss": 0.0025, | |
| "step": 26050 | |
| }, | |
| { | |
| "epoch": 13.14, | |
| "grad_norm": 2.3617360591888428, | |
| "learning_rate": 7.430653266331659e-06, | |
| "loss": 0.0028, | |
| "step": 26075 | |
| }, | |
| { | |
| "epoch": 13.15, | |
| "grad_norm": 1.100469708442688, | |
| "learning_rate": 7.428140703517589e-06, | |
| "loss": 0.0029, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 13.16, | |
| "grad_norm": 1.3083537817001343, | |
| "learning_rate": 7.425628140703518e-06, | |
| "loss": 0.0033, | |
| "step": 26125 | |
| }, | |
| { | |
| "epoch": 13.17, | |
| "grad_norm": 1.1170566082000732, | |
| "learning_rate": 7.423115577889448e-06, | |
| "loss": 0.0028, | |
| "step": 26150 | |
| }, | |
| { | |
| "epoch": 13.19, | |
| "grad_norm": 1.4462560415267944, | |
| "learning_rate": 7.420603015075377e-06, | |
| "loss": 0.0027, | |
| "step": 26175 | |
| }, | |
| { | |
| "epoch": 13.2, | |
| "grad_norm": 0.9630836844444275, | |
| "learning_rate": 7.418090452261307e-06, | |
| "loss": 0.0033, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 13.21, | |
| "grad_norm": 0.7968631386756897, | |
| "learning_rate": 7.415577889447236e-06, | |
| "loss": 0.0025, | |
| "step": 26225 | |
| }, | |
| { | |
| "epoch": 13.22, | |
| "grad_norm": 2.100764751434326, | |
| "learning_rate": 7.4130653266331665e-06, | |
| "loss": 0.0026, | |
| "step": 26250 | |
| }, | |
| { | |
| "epoch": 13.24, | |
| "grad_norm": 1.6591538190841675, | |
| "learning_rate": 7.410552763819097e-06, | |
| "loss": 0.0031, | |
| "step": 26275 | |
| }, | |
| { | |
| "epoch": 13.25, | |
| "grad_norm": 0.645767867565155, | |
| "learning_rate": 7.408040201005026e-06, | |
| "loss": 0.003, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 13.26, | |
| "grad_norm": 1.4978662729263306, | |
| "learning_rate": 7.405527638190956e-06, | |
| "loss": 0.0028, | |
| "step": 26325 | |
| }, | |
| { | |
| "epoch": 13.27, | |
| "grad_norm": 1.357334852218628, | |
| "learning_rate": 7.403015075376885e-06, | |
| "loss": 0.0026, | |
| "step": 26350 | |
| }, | |
| { | |
| "epoch": 13.29, | |
| "grad_norm": 1.751214861869812, | |
| "learning_rate": 7.400502512562815e-06, | |
| "loss": 0.0027, | |
| "step": 26375 | |
| }, | |
| { | |
| "epoch": 13.3, | |
| "grad_norm": 0.9337909817695618, | |
| "learning_rate": 7.397989949748744e-06, | |
| "loss": 0.0026, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 13.31, | |
| "grad_norm": 1.3542364835739136, | |
| "learning_rate": 7.395477386934674e-06, | |
| "loss": 0.0031, | |
| "step": 26425 | |
| }, | |
| { | |
| "epoch": 13.32, | |
| "grad_norm": 0.9499005675315857, | |
| "learning_rate": 7.392964824120603e-06, | |
| "loss": 0.0031, | |
| "step": 26450 | |
| }, | |
| { | |
| "epoch": 13.34, | |
| "grad_norm": 1.5429147481918335, | |
| "learning_rate": 7.390452261306533e-06, | |
| "loss": 0.0029, | |
| "step": 26475 | |
| }, | |
| { | |
| "epoch": 13.35, | |
| "grad_norm": 0.8379466533660889, | |
| "learning_rate": 7.387939698492463e-06, | |
| "loss": 0.0024, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 13.36, | |
| "grad_norm": 1.3563404083251953, | |
| "learning_rate": 7.385427135678392e-06, | |
| "loss": 0.0029, | |
| "step": 26525 | |
| }, | |
| { | |
| "epoch": 13.38, | |
| "grad_norm": 2.4341177940368652, | |
| "learning_rate": 7.382914572864323e-06, | |
| "loss": 0.0028, | |
| "step": 26550 | |
| }, | |
| { | |
| "epoch": 13.39, | |
| "grad_norm": 0.8350504636764526, | |
| "learning_rate": 7.380402010050252e-06, | |
| "loss": 0.0027, | |
| "step": 26575 | |
| }, | |
| { | |
| "epoch": 13.4, | |
| "grad_norm": 1.5221184492111206, | |
| "learning_rate": 7.377889447236182e-06, | |
| "loss": 0.0027, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 13.41, | |
| "grad_norm": 2.1058216094970703, | |
| "learning_rate": 7.3753768844221105e-06, | |
| "loss": 0.0027, | |
| "step": 26625 | |
| }, | |
| { | |
| "epoch": 13.43, | |
| "grad_norm": 1.1398578882217407, | |
| "learning_rate": 7.372864321608041e-06, | |
| "loss": 0.0029, | |
| "step": 26650 | |
| }, | |
| { | |
| "epoch": 13.44, | |
| "grad_norm": 2.2516255378723145, | |
| "learning_rate": 7.37035175879397e-06, | |
| "loss": 0.0029, | |
| "step": 26675 | |
| }, | |
| { | |
| "epoch": 13.45, | |
| "grad_norm": 1.5439566373825073, | |
| "learning_rate": 7.3678391959799e-06, | |
| "loss": 0.0028, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 13.46, | |
| "grad_norm": 0.6818922758102417, | |
| "learning_rate": 7.36532663316583e-06, | |
| "loss": 0.0033, | |
| "step": 26725 | |
| }, | |
| { | |
| "epoch": 13.48, | |
| "grad_norm": 1.0167973041534424, | |
| "learning_rate": 7.362814070351759e-06, | |
| "loss": 0.0027, | |
| "step": 26750 | |
| }, | |
| { | |
| "epoch": 13.49, | |
| "grad_norm": 0.8457038998603821, | |
| "learning_rate": 7.360301507537689e-06, | |
| "loss": 0.0027, | |
| "step": 26775 | |
| }, | |
| { | |
| "epoch": 13.5, | |
| "grad_norm": 1.8538858890533447, | |
| "learning_rate": 7.357788944723618e-06, | |
| "loss": 0.0029, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 13.51, | |
| "grad_norm": 1.6280864477157593, | |
| "learning_rate": 7.3552763819095485e-06, | |
| "loss": 0.0029, | |
| "step": 26825 | |
| }, | |
| { | |
| "epoch": 13.53, | |
| "grad_norm": 1.5593993663787842, | |
| "learning_rate": 7.352763819095478e-06, | |
| "loss": 0.0031, | |
| "step": 26850 | |
| }, | |
| { | |
| "epoch": 13.54, | |
| "grad_norm": 2.2892441749572754, | |
| "learning_rate": 7.350251256281408e-06, | |
| "loss": 0.0032, | |
| "step": 26875 | |
| }, | |
| { | |
| "epoch": 13.55, | |
| "grad_norm": 1.0337854623794556, | |
| "learning_rate": 7.347738693467338e-06, | |
| "loss": 0.0032, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 13.56, | |
| "grad_norm": 1.764012336730957, | |
| "learning_rate": 7.345226130653267e-06, | |
| "loss": 0.003, | |
| "step": 26925 | |
| }, | |
| { | |
| "epoch": 13.58, | |
| "grad_norm": 1.3078733682632446, | |
| "learning_rate": 7.342713567839197e-06, | |
| "loss": 0.003, | |
| "step": 26950 | |
| }, | |
| { | |
| "epoch": 13.59, | |
| "grad_norm": 1.0039664506912231, | |
| "learning_rate": 7.340201005025126e-06, | |
| "loss": 0.0033, | |
| "step": 26975 | |
| }, | |
| { | |
| "epoch": 13.6, | |
| "grad_norm": 1.68669593334198, | |
| "learning_rate": 7.337688442211056e-06, | |
| "loss": 0.0032, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 13.6, | |
| "eval_loss": 0.3001398742198944, | |
| "eval_runtime": 654.1931, | |
| "eval_samples_per_second": 2.154, | |
| "eval_steps_per_second": 2.154, | |
| "eval_wer": 24.538222068488412, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 13.61, | |
| "grad_norm": 1.84382963180542, | |
| "learning_rate": 7.335175879396985e-06, | |
| "loss": 0.0029, | |
| "step": 27025 | |
| }, | |
| { | |
| "epoch": 13.63, | |
| "grad_norm": 0.4542626440525055, | |
| "learning_rate": 7.332663316582915e-06, | |
| "loss": 0.0025, | |
| "step": 27050 | |
| }, | |
| { | |
| "epoch": 13.64, | |
| "grad_norm": 1.7250819206237793, | |
| "learning_rate": 7.330150753768844e-06, | |
| "loss": 0.0029, | |
| "step": 27075 | |
| }, | |
| { | |
| "epoch": 13.65, | |
| "grad_norm": 2.1424949169158936, | |
| "learning_rate": 7.3276381909547745e-06, | |
| "loss": 0.003, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 13.66, | |
| "grad_norm": 1.7538329362869263, | |
| "learning_rate": 7.325125628140705e-06, | |
| "loss": 0.0031, | |
| "step": 27125 | |
| }, | |
| { | |
| "epoch": 13.68, | |
| "grad_norm": 1.9615614414215088, | |
| "learning_rate": 7.322613065326634e-06, | |
| "loss": 0.0034, | |
| "step": 27150 | |
| }, | |
| { | |
| "epoch": 13.69, | |
| "grad_norm": 0.4876037836074829, | |
| "learning_rate": 7.320100502512564e-06, | |
| "loss": 0.0026, | |
| "step": 27175 | |
| }, | |
| { | |
| "epoch": 13.7, | |
| "grad_norm": 2.511152744293213, | |
| "learning_rate": 7.317587939698493e-06, | |
| "loss": 0.0036, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 13.72, | |
| "grad_norm": 1.6201894283294678, | |
| "learning_rate": 7.315075376884423e-06, | |
| "loss": 0.003, | |
| "step": 27225 | |
| }, | |
| { | |
| "epoch": 13.73, | |
| "grad_norm": 2.252923011779785, | |
| "learning_rate": 7.312562814070352e-06, | |
| "loss": 0.0036, | |
| "step": 27250 | |
| }, | |
| { | |
| "epoch": 13.74, | |
| "grad_norm": 1.1069494485855103, | |
| "learning_rate": 7.310050251256282e-06, | |
| "loss": 0.0026, | |
| "step": 27275 | |
| }, | |
| { | |
| "epoch": 13.75, | |
| "grad_norm": 0.6992159485816956, | |
| "learning_rate": 7.307537688442211e-06, | |
| "loss": 0.0031, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 13.77, | |
| "grad_norm": 1.252886414527893, | |
| "learning_rate": 7.305025125628141e-06, | |
| "loss": 0.0027, | |
| "step": 27325 | |
| }, | |
| { | |
| "epoch": 13.78, | |
| "grad_norm": 1.4874393939971924, | |
| "learning_rate": 7.302512562814071e-06, | |
| "loss": 0.0026, | |
| "step": 27350 | |
| }, | |
| { | |
| "epoch": 13.79, | |
| "grad_norm": 0.7340197563171387, | |
| "learning_rate": 7.3e-06, | |
| "loss": 0.0026, | |
| "step": 27375 | |
| }, | |
| { | |
| "epoch": 13.8, | |
| "grad_norm": 1.225475788116455, | |
| "learning_rate": 7.297587939698493e-06, | |
| "loss": 0.0032, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 13.82, | |
| "grad_norm": 1.1409143209457397, | |
| "learning_rate": 7.295075376884423e-06, | |
| "loss": 0.0032, | |
| "step": 27425 | |
| }, | |
| { | |
| "epoch": 13.83, | |
| "grad_norm": 1.395493507385254, | |
| "learning_rate": 7.292562814070352e-06, | |
| "loss": 0.003, | |
| "step": 27450 | |
| }, | |
| { | |
| "epoch": 13.84, | |
| "grad_norm": 1.5672920942306519, | |
| "learning_rate": 7.2900502512562825e-06, | |
| "loss": 0.0034, | |
| "step": 27475 | |
| }, | |
| { | |
| "epoch": 13.85, | |
| "grad_norm": 1.7837570905685425, | |
| "learning_rate": 7.287537688442211e-06, | |
| "loss": 0.0034, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 13.87, | |
| "grad_norm": 2.0357208251953125, | |
| "learning_rate": 7.285025125628141e-06, | |
| "loss": 0.0035, | |
| "step": 27525 | |
| }, | |
| { | |
| "epoch": 13.88, | |
| "grad_norm": 1.0619276762008667, | |
| "learning_rate": 7.282512562814071e-06, | |
| "loss": 0.0028, | |
| "step": 27550 | |
| }, | |
| { | |
| "epoch": 13.89, | |
| "grad_norm": 1.7723731994628906, | |
| "learning_rate": 7.280000000000001e-06, | |
| "loss": 0.0035, | |
| "step": 27575 | |
| }, | |
| { | |
| "epoch": 13.9, | |
| "grad_norm": 2.082310438156128, | |
| "learning_rate": 7.277487437185931e-06, | |
| "loss": 0.0036, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 13.92, | |
| "grad_norm": 1.348310112953186, | |
| "learning_rate": 7.274974874371859e-06, | |
| "loss": 0.0032, | |
| "step": 27625 | |
| }, | |
| { | |
| "epoch": 13.93, | |
| "grad_norm": 1.0664507150650024, | |
| "learning_rate": 7.272462311557789e-06, | |
| "loss": 0.003, | |
| "step": 27650 | |
| }, | |
| { | |
| "epoch": 13.94, | |
| "grad_norm": 1.124415397644043, | |
| "learning_rate": 7.269949748743719e-06, | |
| "loss": 0.0032, | |
| "step": 27675 | |
| }, | |
| { | |
| "epoch": 13.95, | |
| "grad_norm": 1.4353559017181396, | |
| "learning_rate": 7.267437185929649e-06, | |
| "loss": 0.0033, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 13.97, | |
| "grad_norm": 1.658118724822998, | |
| "learning_rate": 7.264924623115579e-06, | |
| "loss": 0.0038, | |
| "step": 27725 | |
| }, | |
| { | |
| "epoch": 13.98, | |
| "grad_norm": 1.0329655408859253, | |
| "learning_rate": 7.262412060301508e-06, | |
| "loss": 0.0032, | |
| "step": 27750 | |
| }, | |
| { | |
| "epoch": 13.99, | |
| "grad_norm": 1.345921516418457, | |
| "learning_rate": 7.259899497487439e-06, | |
| "loss": 0.0025, | |
| "step": 27775 | |
| }, | |
| { | |
| "epoch": 14.01, | |
| "grad_norm": 1.0595952272415161, | |
| "learning_rate": 7.257386934673367e-06, | |
| "loss": 0.0027, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 14.02, | |
| "grad_norm": 1.766713261604309, | |
| "learning_rate": 7.254874371859297e-06, | |
| "loss": 0.0025, | |
| "step": 27825 | |
| }, | |
| { | |
| "epoch": 14.03, | |
| "grad_norm": 1.3049800395965576, | |
| "learning_rate": 7.2523618090452265e-06, | |
| "loss": 0.0018, | |
| "step": 27850 | |
| }, | |
| { | |
| "epoch": 14.04, | |
| "grad_norm": 2.0778799057006836, | |
| "learning_rate": 7.249849246231157e-06, | |
| "loss": 0.0025, | |
| "step": 27875 | |
| }, | |
| { | |
| "epoch": 14.06, | |
| "grad_norm": 0.90594881772995, | |
| "learning_rate": 7.247336683417085e-06, | |
| "loss": 0.0022, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 14.07, | |
| "grad_norm": 0.35941508412361145, | |
| "learning_rate": 7.244824120603015e-06, | |
| "loss": 0.0022, | |
| "step": 27925 | |
| }, | |
| { | |
| "epoch": 14.08, | |
| "grad_norm": 0.7269408106803894, | |
| "learning_rate": 7.2423115577889455e-06, | |
| "loss": 0.0021, | |
| "step": 27950 | |
| }, | |
| { | |
| "epoch": 14.09, | |
| "grad_norm": 1.4327499866485596, | |
| "learning_rate": 7.239798994974875e-06, | |
| "loss": 0.0025, | |
| "step": 27975 | |
| }, | |
| { | |
| "epoch": 14.11, | |
| "grad_norm": 0.6035653948783875, | |
| "learning_rate": 7.237286432160805e-06, | |
| "loss": 0.0024, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 14.11, | |
| "eval_loss": 0.3001614212989807, | |
| "eval_runtime": 784.2944, | |
| "eval_samples_per_second": 1.797, | |
| "eval_steps_per_second": 1.797, | |
| "eval_wer": 23.20304392943618, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 14.12, | |
| "grad_norm": 1.209418535232544, | |
| "learning_rate": 7.234773869346734e-06, | |
| "loss": 0.0022, | |
| "step": 28025 | |
| }, | |
| { | |
| "epoch": 14.13, | |
| "grad_norm": 1.2968195676803589, | |
| "learning_rate": 7.2322613065326645e-06, | |
| "loss": 0.0024, | |
| "step": 28050 | |
| }, | |
| { | |
| "epoch": 14.14, | |
| "grad_norm": 1.5654141902923584, | |
| "learning_rate": 7.229748743718593e-06, | |
| "loss": 0.0027, | |
| "step": 28075 | |
| }, | |
| { | |
| "epoch": 14.16, | |
| "grad_norm": 1.4532732963562012, | |
| "learning_rate": 7.227236180904523e-06, | |
| "loss": 0.0024, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 14.17, | |
| "grad_norm": 1.7169549465179443, | |
| "learning_rate": 7.2247236180904525e-06, | |
| "loss": 0.0022, | |
| "step": 28125 | |
| }, | |
| { | |
| "epoch": 14.18, | |
| "grad_norm": 1.32295560836792, | |
| "learning_rate": 7.222211055276383e-06, | |
| "loss": 0.0022, | |
| "step": 28150 | |
| }, | |
| { | |
| "epoch": 14.19, | |
| "grad_norm": 0.8905125856399536, | |
| "learning_rate": 7.219698492462313e-06, | |
| "loss": 0.0022, | |
| "step": 28175 | |
| }, | |
| { | |
| "epoch": 14.21, | |
| "grad_norm": 0.8778842687606812, | |
| "learning_rate": 7.217185929648241e-06, | |
| "loss": 0.0021, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 14.22, | |
| "grad_norm": 1.4678562879562378, | |
| "learning_rate": 7.2146733668341715e-06, | |
| "loss": 0.0025, | |
| "step": 28225 | |
| }, | |
| { | |
| "epoch": 14.23, | |
| "grad_norm": 0.7229999899864197, | |
| "learning_rate": 7.212160804020101e-06, | |
| "loss": 0.0022, | |
| "step": 28250 | |
| }, | |
| { | |
| "epoch": 14.24, | |
| "grad_norm": 2.1713428497314453, | |
| "learning_rate": 7.209648241206031e-06, | |
| "loss": 0.0023, | |
| "step": 28275 | |
| }, | |
| { | |
| "epoch": 14.26, | |
| "grad_norm": 2.5556602478027344, | |
| "learning_rate": 7.20713567839196e-06, | |
| "loss": 0.0022, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 14.27, | |
| "grad_norm": 0.7470478415489197, | |
| "learning_rate": 7.2046231155778905e-06, | |
| "loss": 0.0022, | |
| "step": 28325 | |
| }, | |
| { | |
| "epoch": 14.28, | |
| "grad_norm": 1.2497297525405884, | |
| "learning_rate": 7.20211055276382e-06, | |
| "loss": 0.0022, | |
| "step": 28350 | |
| }, | |
| { | |
| "epoch": 14.29, | |
| "grad_norm": 0.5248861908912659, | |
| "learning_rate": 7.199597989949749e-06, | |
| "loss": 0.0024, | |
| "step": 28375 | |
| }, | |
| { | |
| "epoch": 14.31, | |
| "grad_norm": 1.0900764465332031, | |
| "learning_rate": 7.197085427135679e-06, | |
| "loss": 0.0025, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 14.32, | |
| "grad_norm": 1.5685707330703735, | |
| "learning_rate": 7.194572864321609e-06, | |
| "loss": 0.0028, | |
| "step": 28425 | |
| }, | |
| { | |
| "epoch": 14.33, | |
| "grad_norm": 1.298081874847412, | |
| "learning_rate": 7.192060301507539e-06, | |
| "loss": 0.0026, | |
| "step": 28450 | |
| }, | |
| { | |
| "epoch": 14.35, | |
| "grad_norm": 1.188835620880127, | |
| "learning_rate": 7.189547738693467e-06, | |
| "loss": 0.0024, | |
| "step": 28475 | |
| }, | |
| { | |
| "epoch": 14.36, | |
| "grad_norm": 2.094358205795288, | |
| "learning_rate": 7.187035175879397e-06, | |
| "loss": 0.0023, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 14.37, | |
| "grad_norm": 1.2566583156585693, | |
| "learning_rate": 7.184522613065327e-06, | |
| "loss": 0.0028, | |
| "step": 28525 | |
| }, | |
| { | |
| "epoch": 14.38, | |
| "grad_norm": 1.1933472156524658, | |
| "learning_rate": 7.182010050251257e-06, | |
| "loss": 0.0025, | |
| "step": 28550 | |
| }, | |
| { | |
| "epoch": 14.4, | |
| "grad_norm": 1.451371669769287, | |
| "learning_rate": 7.179497487437187e-06, | |
| "loss": 0.0028, | |
| "step": 28575 | |
| }, | |
| { | |
| "epoch": 14.41, | |
| "grad_norm": 1.804445743560791, | |
| "learning_rate": 7.176984924623116e-06, | |
| "loss": 0.0026, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 14.42, | |
| "grad_norm": 0.8600190877914429, | |
| "learning_rate": 7.174472361809047e-06, | |
| "loss": 0.0025, | |
| "step": 28625 | |
| }, | |
| { | |
| "epoch": 14.43, | |
| "grad_norm": 0.6841452121734619, | |
| "learning_rate": 7.171959798994975e-06, | |
| "loss": 0.0027, | |
| "step": 28650 | |
| }, | |
| { | |
| "epoch": 14.45, | |
| "grad_norm": 0.7692683339118958, | |
| "learning_rate": 7.169447236180905e-06, | |
| "loss": 0.0023, | |
| "step": 28675 | |
| }, | |
| { | |
| "epoch": 14.46, | |
| "grad_norm": 1.5418920516967773, | |
| "learning_rate": 7.1669346733668345e-06, | |
| "loss": 0.0026, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 14.47, | |
| "grad_norm": 1.3701914548873901, | |
| "learning_rate": 7.164422110552765e-06, | |
| "loss": 0.0023, | |
| "step": 28725 | |
| }, | |
| { | |
| "epoch": 14.48, | |
| "grad_norm": 1.2819687128067017, | |
| "learning_rate": 7.161909547738693e-06, | |
| "loss": 0.0031, | |
| "step": 28750 | |
| }, | |
| { | |
| "epoch": 14.5, | |
| "grad_norm": 0.9504879117012024, | |
| "learning_rate": 7.159396984924623e-06, | |
| "loss": 0.0023, | |
| "step": 28775 | |
| }, | |
| { | |
| "epoch": 14.51, | |
| "grad_norm": 1.3846092224121094, | |
| "learning_rate": 7.1568844221105535e-06, | |
| "loss": 0.0028, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 14.52, | |
| "grad_norm": 0.8752845525741577, | |
| "learning_rate": 7.154371859296483e-06, | |
| "loss": 0.0027, | |
| "step": 28825 | |
| }, | |
| { | |
| "epoch": 14.53, | |
| "grad_norm": 2.0625505447387695, | |
| "learning_rate": 7.151859296482413e-06, | |
| "loss": 0.0028, | |
| "step": 28850 | |
| }, | |
| { | |
| "epoch": 14.55, | |
| "grad_norm": 1.281076192855835, | |
| "learning_rate": 7.149346733668342e-06, | |
| "loss": 0.0026, | |
| "step": 28875 | |
| }, | |
| { | |
| "epoch": 14.56, | |
| "grad_norm": 1.1781672239303589, | |
| "learning_rate": 7.1468341708542725e-06, | |
| "loss": 0.0031, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 14.57, | |
| "grad_norm": 1.7508938312530518, | |
| "learning_rate": 7.144321608040201e-06, | |
| "loss": 0.0027, | |
| "step": 28925 | |
| }, | |
| { | |
| "epoch": 14.58, | |
| "grad_norm": 1.2148828506469727, | |
| "learning_rate": 7.141809045226131e-06, | |
| "loss": 0.003, | |
| "step": 28950 | |
| }, | |
| { | |
| "epoch": 14.6, | |
| "grad_norm": 1.6405340433120728, | |
| "learning_rate": 7.139296482412061e-06, | |
| "loss": 0.0025, | |
| "step": 28975 | |
| }, | |
| { | |
| "epoch": 14.61, | |
| "grad_norm": 0.7206986546516418, | |
| "learning_rate": 7.136783919597991e-06, | |
| "loss": 0.0025, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 14.61, | |
| "eval_loss": 0.3025396168231964, | |
| "eval_runtime": 646.2169, | |
| "eval_samples_per_second": 2.18, | |
| "eval_steps_per_second": 2.18, | |
| "eval_wer": 23.79107575233483, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 14.62, | |
| "grad_norm": 1.8807600736618042, | |
| "learning_rate": 7.134271356783921e-06, | |
| "loss": 0.0028, | |
| "step": 29025 | |
| }, | |
| { | |
| "epoch": 14.63, | |
| "grad_norm": 0.9913462996482849, | |
| "learning_rate": 7.131758793969849e-06, | |
| "loss": 0.0026, | |
| "step": 29050 | |
| }, | |
| { | |
| "epoch": 14.65, | |
| "grad_norm": 0.6450251340866089, | |
| "learning_rate": 7.1292462311557795e-06, | |
| "loss": 0.002, | |
| "step": 29075 | |
| }, | |
| { | |
| "epoch": 14.66, | |
| "grad_norm": 1.563607096672058, | |
| "learning_rate": 7.126733668341709e-06, | |
| "loss": 0.0024, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 14.67, | |
| "grad_norm": 1.7868529558181763, | |
| "learning_rate": 7.124221105527639e-06, | |
| "loss": 0.0023, | |
| "step": 29125 | |
| }, | |
| { | |
| "epoch": 14.69, | |
| "grad_norm": 1.2877388000488281, | |
| "learning_rate": 7.121708542713568e-06, | |
| "loss": 0.0023, | |
| "step": 29150 | |
| }, | |
| { | |
| "epoch": 14.7, | |
| "grad_norm": 1.0285090208053589, | |
| "learning_rate": 7.1191959798994985e-06, | |
| "loss": 0.0029, | |
| "step": 29175 | |
| }, | |
| { | |
| "epoch": 14.71, | |
| "grad_norm": 0.9814359545707703, | |
| "learning_rate": 7.116683417085428e-06, | |
| "loss": 0.0024, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 14.72, | |
| "grad_norm": 3.390266180038452, | |
| "learning_rate": 7.114170854271357e-06, | |
| "loss": 0.0031, | |
| "step": 29225 | |
| }, | |
| { | |
| "epoch": 14.74, | |
| "grad_norm": 0.9892065525054932, | |
| "learning_rate": 7.111658291457287e-06, | |
| "loss": 0.0026, | |
| "step": 29250 | |
| }, | |
| { | |
| "epoch": 14.75, | |
| "grad_norm": 1.4779770374298096, | |
| "learning_rate": 7.109145728643217e-06, | |
| "loss": 0.0027, | |
| "step": 29275 | |
| }, | |
| { | |
| "epoch": 14.76, | |
| "grad_norm": 1.935717225074768, | |
| "learning_rate": 7.106633165829147e-06, | |
| "loss": 0.0034, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 14.77, | |
| "grad_norm": 0.9933359622955322, | |
| "learning_rate": 7.104120603015075e-06, | |
| "loss": 0.0026, | |
| "step": 29325 | |
| }, | |
| { | |
| "epoch": 14.79, | |
| "grad_norm": 1.2649095058441162, | |
| "learning_rate": 7.1016080402010054e-06, | |
| "loss": 0.0025, | |
| "step": 29350 | |
| }, | |
| { | |
| "epoch": 14.8, | |
| "grad_norm": 0.9197986125946045, | |
| "learning_rate": 7.099095477386935e-06, | |
| "loss": 0.0025, | |
| "step": 29375 | |
| }, | |
| { | |
| "epoch": 14.81, | |
| "grad_norm": 0.7807173132896423, | |
| "learning_rate": 7.096582914572865e-06, | |
| "loss": 0.0029, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 14.82, | |
| "grad_norm": 1.1959360837936401, | |
| "learning_rate": 7.094070351758795e-06, | |
| "loss": 0.0029, | |
| "step": 29425 | |
| }, | |
| { | |
| "epoch": 14.84, | |
| "grad_norm": 1.7630362510681152, | |
| "learning_rate": 7.091557788944724e-06, | |
| "loss": 0.0024, | |
| "step": 29450 | |
| }, | |
| { | |
| "epoch": 14.85, | |
| "grad_norm": 1.1034917831420898, | |
| "learning_rate": 7.089045226130654e-06, | |
| "loss": 0.0027, | |
| "step": 29475 | |
| }, | |
| { | |
| "epoch": 14.86, | |
| "grad_norm": 1.185198426246643, | |
| "learning_rate": 7.086532663316583e-06, | |
| "loss": 0.0022, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 14.87, | |
| "grad_norm": 0.9745866656303406, | |
| "learning_rate": 7.084020100502513e-06, | |
| "loss": 0.0029, | |
| "step": 29525 | |
| }, | |
| { | |
| "epoch": 14.89, | |
| "grad_norm": 1.3043872117996216, | |
| "learning_rate": 7.0815075376884426e-06, | |
| "loss": 0.0022, | |
| "step": 29550 | |
| }, | |
| { | |
| "epoch": 14.9, | |
| "grad_norm": 1.9529894590377808, | |
| "learning_rate": 7.078994974874373e-06, | |
| "loss": 0.0026, | |
| "step": 29575 | |
| }, | |
| { | |
| "epoch": 14.91, | |
| "grad_norm": 1.3992432355880737, | |
| "learning_rate": 7.076482412060303e-06, | |
| "loss": 0.0028, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 14.92, | |
| "grad_norm": 1.66525399684906, | |
| "learning_rate": 7.073969849246231e-06, | |
| "loss": 0.0026, | |
| "step": 29625 | |
| }, | |
| { | |
| "epoch": 14.94, | |
| "grad_norm": 1.0909807682037354, | |
| "learning_rate": 7.0714572864321615e-06, | |
| "loss": 0.0024, | |
| "step": 29650 | |
| }, | |
| { | |
| "epoch": 14.95, | |
| "grad_norm": 1.1937345266342163, | |
| "learning_rate": 7.068944723618091e-06, | |
| "loss": 0.0028, | |
| "step": 29675 | |
| }, | |
| { | |
| "epoch": 14.96, | |
| "grad_norm": 1.9024869203567505, | |
| "learning_rate": 7.066432160804021e-06, | |
| "loss": 0.0029, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 14.97, | |
| "grad_norm": 1.720049500465393, | |
| "learning_rate": 7.06391959798995e-06, | |
| "loss": 0.0026, | |
| "step": 29725 | |
| }, | |
| { | |
| "epoch": 14.99, | |
| "grad_norm": 1.847103476524353, | |
| "learning_rate": 7.0614070351758805e-06, | |
| "loss": 0.0028, | |
| "step": 29750 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 2.2860000133514404, | |
| "learning_rate": 7.058894472361809e-06, | |
| "loss": 0.0031, | |
| "step": 29775 | |
| }, | |
| { | |
| "epoch": 15.01, | |
| "grad_norm": 0.5796921849250793, | |
| "learning_rate": 7.056381909547739e-06, | |
| "loss": 0.0018, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 15.03, | |
| "grad_norm": 0.9784811735153198, | |
| "learning_rate": 7.053869346733669e-06, | |
| "loss": 0.0017, | |
| "step": 29825 | |
| }, | |
| { | |
| "epoch": 15.04, | |
| "grad_norm": 0.5142715573310852, | |
| "learning_rate": 7.051356783919599e-06, | |
| "loss": 0.0016, | |
| "step": 29850 | |
| }, | |
| { | |
| "epoch": 15.05, | |
| "grad_norm": 0.4503660202026367, | |
| "learning_rate": 7.048844221105529e-06, | |
| "loss": 0.0016, | |
| "step": 29875 | |
| }, | |
| { | |
| "epoch": 15.06, | |
| "grad_norm": 1.5839601755142212, | |
| "learning_rate": 7.046331658291457e-06, | |
| "loss": 0.0017, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 15.08, | |
| "grad_norm": 1.3505232334136963, | |
| "learning_rate": 7.0438190954773875e-06, | |
| "loss": 0.0017, | |
| "step": 29925 | |
| }, | |
| { | |
| "epoch": 15.09, | |
| "grad_norm": 0.8439552783966064, | |
| "learning_rate": 7.041306532663317e-06, | |
| "loss": 0.0021, | |
| "step": 29950 | |
| }, | |
| { | |
| "epoch": 15.1, | |
| "grad_norm": 0.5884461402893066, | |
| "learning_rate": 7.038793969849247e-06, | |
| "loss": 0.0019, | |
| "step": 29975 | |
| }, | |
| { | |
| "epoch": 15.11, | |
| "grad_norm": 1.5458250045776367, | |
| "learning_rate": 7.036281407035176e-06, | |
| "loss": 0.0016, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 15.11, | |
| "eval_loss": 0.3082928955554962, | |
| "eval_runtime": 644.0315, | |
| "eval_samples_per_second": 2.188, | |
| "eval_steps_per_second": 2.188, | |
| "eval_wer": 23.12694569353165, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 15.13, | |
| "grad_norm": 0.5485078692436218, | |
| "learning_rate": 7.0337688442211065e-06, | |
| "loss": 0.002, | |
| "step": 30025 | |
| }, | |
| { | |
| "epoch": 15.14, | |
| "grad_norm": 0.7897219657897949, | |
| "learning_rate": 7.031256281407036e-06, | |
| "loss": 0.002, | |
| "step": 30050 | |
| }, | |
| { | |
| "epoch": 15.15, | |
| "grad_norm": 1.791757345199585, | |
| "learning_rate": 7.028743718592965e-06, | |
| "loss": 0.002, | |
| "step": 30075 | |
| }, | |
| { | |
| "epoch": 15.16, | |
| "grad_norm": 1.5494959354400635, | |
| "learning_rate": 7.026231155778895e-06, | |
| "loss": 0.0017, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 15.18, | |
| "grad_norm": 1.4792935848236084, | |
| "learning_rate": 7.023718592964825e-06, | |
| "loss": 0.0021, | |
| "step": 30125 | |
| }, | |
| { | |
| "epoch": 15.19, | |
| "grad_norm": 0.9007993340492249, | |
| "learning_rate": 7.021306532663317e-06, | |
| "loss": 0.0014, | |
| "step": 30150 | |
| }, | |
| { | |
| "epoch": 15.2, | |
| "grad_norm": 1.4334176778793335, | |
| "learning_rate": 7.018793969849247e-06, | |
| "loss": 0.0022, | |
| "step": 30175 | |
| }, | |
| { | |
| "epoch": 15.21, | |
| "grad_norm": 0.3024737238883972, | |
| "learning_rate": 7.016281407035176e-06, | |
| "loss": 0.0018, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 15.23, | |
| "grad_norm": 0.6104531288146973, | |
| "learning_rate": 7.013768844221106e-06, | |
| "loss": 0.0018, | |
| "step": 30225 | |
| }, | |
| { | |
| "epoch": 15.24, | |
| "grad_norm": 0.5760412216186523, | |
| "learning_rate": 7.011256281407036e-06, | |
| "loss": 0.0022, | |
| "step": 30250 | |
| }, | |
| { | |
| "epoch": 15.25, | |
| "grad_norm": 1.2721421718597412, | |
| "learning_rate": 7.008743718592965e-06, | |
| "loss": 0.0023, | |
| "step": 30275 | |
| }, | |
| { | |
| "epoch": 15.26, | |
| "grad_norm": 1.3227627277374268, | |
| "learning_rate": 7.0062311557788955e-06, | |
| "loss": 0.002, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 15.28, | |
| "grad_norm": 0.7991645932197571, | |
| "learning_rate": 7.003718592964824e-06, | |
| "loss": 0.0022, | |
| "step": 30325 | |
| }, | |
| { | |
| "epoch": 15.29, | |
| "grad_norm": 0.947595477104187, | |
| "learning_rate": 7.001206030150754e-06, | |
| "loss": 0.0019, | |
| "step": 30350 | |
| }, | |
| { | |
| "epoch": 15.3, | |
| "grad_norm": 0.308912456035614, | |
| "learning_rate": 6.9986934673366834e-06, | |
| "loss": 0.0022, | |
| "step": 30375 | |
| }, | |
| { | |
| "epoch": 15.31, | |
| "grad_norm": 0.7527008056640625, | |
| "learning_rate": 6.996180904522614e-06, | |
| "loss": 0.0021, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 15.33, | |
| "grad_norm": 1.4596527814865112, | |
| "learning_rate": 6.993668341708544e-06, | |
| "loss": 0.0022, | |
| "step": 30425 | |
| }, | |
| { | |
| "epoch": 15.34, | |
| "grad_norm": 0.6579309701919556, | |
| "learning_rate": 6.991155778894473e-06, | |
| "loss": 0.0022, | |
| "step": 30450 | |
| }, | |
| { | |
| "epoch": 15.35, | |
| "grad_norm": 1.1227294206619263, | |
| "learning_rate": 6.988643216080403e-06, | |
| "loss": 0.0025, | |
| "step": 30475 | |
| }, | |
| { | |
| "epoch": 15.37, | |
| "grad_norm": 1.243522047996521, | |
| "learning_rate": 6.986130653266332e-06, | |
| "loss": 0.0024, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 15.38, | |
| "grad_norm": 1.4679538011550903, | |
| "learning_rate": 6.983618090452262e-06, | |
| "loss": 0.0025, | |
| "step": 30525 | |
| }, | |
| { | |
| "epoch": 15.39, | |
| "grad_norm": 0.4298401176929474, | |
| "learning_rate": 6.981105527638191e-06, | |
| "loss": 0.0025, | |
| "step": 30550 | |
| }, | |
| { | |
| "epoch": 15.4, | |
| "grad_norm": 1.8829938173294067, | |
| "learning_rate": 6.978592964824121e-06, | |
| "loss": 0.0029, | |
| "step": 30575 | |
| }, | |
| { | |
| "epoch": 15.42, | |
| "grad_norm": 0.4301297068595886, | |
| "learning_rate": 6.976080402010051e-06, | |
| "loss": 0.0022, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 15.43, | |
| "grad_norm": 0.4856531322002411, | |
| "learning_rate": 6.97356783919598e-06, | |
| "loss": 0.0022, | |
| "step": 30625 | |
| }, | |
| { | |
| "epoch": 15.44, | |
| "grad_norm": 1.4196797609329224, | |
| "learning_rate": 6.97105527638191e-06, | |
| "loss": 0.0024, | |
| "step": 30650 | |
| }, | |
| { | |
| "epoch": 15.45, | |
| "grad_norm": 0.9456383585929871, | |
| "learning_rate": 6.9685427135678396e-06, | |
| "loss": 0.0026, | |
| "step": 30675 | |
| }, | |
| { | |
| "epoch": 15.47, | |
| "grad_norm": 0.5812883973121643, | |
| "learning_rate": 6.96603015075377e-06, | |
| "loss": 0.0023, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 15.48, | |
| "grad_norm": 1.5798356533050537, | |
| "learning_rate": 6.963517587939699e-06, | |
| "loss": 0.0026, | |
| "step": 30725 | |
| }, | |
| { | |
| "epoch": 15.49, | |
| "grad_norm": 1.7559192180633545, | |
| "learning_rate": 6.961005025125629e-06, | |
| "loss": 0.0023, | |
| "step": 30750 | |
| }, | |
| { | |
| "epoch": 15.5, | |
| "grad_norm": 0.9900276064872742, | |
| "learning_rate": 6.958492462311558e-06, | |
| "loss": 0.0023, | |
| "step": 30775 | |
| }, | |
| { | |
| "epoch": 15.52, | |
| "grad_norm": 1.1745249032974243, | |
| "learning_rate": 6.955979899497488e-06, | |
| "loss": 0.0028, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 15.53, | |
| "grad_norm": 1.3300117254257202, | |
| "learning_rate": 6.953467336683417e-06, | |
| "loss": 0.0021, | |
| "step": 30825 | |
| }, | |
| { | |
| "epoch": 15.54, | |
| "grad_norm": 1.642714023590088, | |
| "learning_rate": 6.950954773869347e-06, | |
| "loss": 0.0022, | |
| "step": 30850 | |
| }, | |
| { | |
| "epoch": 15.55, | |
| "grad_norm": 1.159639835357666, | |
| "learning_rate": 6.9484422110552775e-06, | |
| "loss": 0.0024, | |
| "step": 30875 | |
| }, | |
| { | |
| "epoch": 15.57, | |
| "grad_norm": 1.291977882385254, | |
| "learning_rate": 6.945929648241206e-06, | |
| "loss": 0.0021, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 15.58, | |
| "grad_norm": 2.5278995037078857, | |
| "learning_rate": 6.943417085427136e-06, | |
| "loss": 0.0027, | |
| "step": 30925 | |
| }, | |
| { | |
| "epoch": 15.59, | |
| "grad_norm": 1.345812439918518, | |
| "learning_rate": 6.9409045226130655e-06, | |
| "loss": 0.0023, | |
| "step": 30950 | |
| }, | |
| { | |
| "epoch": 15.6, | |
| "grad_norm": 0.608741044998169, | |
| "learning_rate": 6.938391959798996e-06, | |
| "loss": 0.0027, | |
| "step": 30975 | |
| }, | |
| { | |
| "epoch": 15.62, | |
| "grad_norm": 1.752109169960022, | |
| "learning_rate": 6.935879396984925e-06, | |
| "loss": 0.0025, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 15.62, | |
| "eval_loss": 0.31733256578445435, | |
| "eval_runtime": 645.4398, | |
| "eval_samples_per_second": 2.183, | |
| "eval_steps_per_second": 2.183, | |
| "eval_wer": 23.590453130404704, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 15.63, | |
| "grad_norm": 0.8238040804862976, | |
| "learning_rate": 6.933366834170855e-06, | |
| "loss": 0.0022, | |
| "step": 31025 | |
| }, | |
| { | |
| "epoch": 15.64, | |
| "grad_norm": 0.9175546169281006, | |
| "learning_rate": 6.930854271356785e-06, | |
| "loss": 0.0023, | |
| "step": 31050 | |
| }, | |
| { | |
| "epoch": 15.65, | |
| "grad_norm": 1.2227699756622314, | |
| "learning_rate": 6.928341708542714e-06, | |
| "loss": 0.0022, | |
| "step": 31075 | |
| }, | |
| { | |
| "epoch": 15.67, | |
| "grad_norm": 1.981120228767395, | |
| "learning_rate": 6.925829145728644e-06, | |
| "loss": 0.0022, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 15.68, | |
| "grad_norm": 1.4415370225906372, | |
| "learning_rate": 6.923316582914573e-06, | |
| "loss": 0.0025, | |
| "step": 31125 | |
| }, | |
| { | |
| "epoch": 15.69, | |
| "grad_norm": 1.6438077688217163, | |
| "learning_rate": 6.9208040201005035e-06, | |
| "loss": 0.0024, | |
| "step": 31150 | |
| }, | |
| { | |
| "epoch": 15.71, | |
| "grad_norm": 2.1774020195007324, | |
| "learning_rate": 6.918291457286432e-06, | |
| "loss": 0.0024, | |
| "step": 31175 | |
| }, | |
| { | |
| "epoch": 15.72, | |
| "grad_norm": 0.5905573964118958, | |
| "learning_rate": 6.915778894472362e-06, | |
| "loss": 0.0025, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 15.73, | |
| "grad_norm": 1.7300103902816772, | |
| "learning_rate": 6.9132663316582915e-06, | |
| "loss": 0.0026, | |
| "step": 31225 | |
| }, | |
| { | |
| "epoch": 15.74, | |
| "grad_norm": 1.1542717218399048, | |
| "learning_rate": 6.910753768844222e-06, | |
| "loss": 0.0023, | |
| "step": 31250 | |
| }, | |
| { | |
| "epoch": 15.76, | |
| "grad_norm": 1.279700756072998, | |
| "learning_rate": 6.908241206030152e-06, | |
| "loss": 0.0024, | |
| "step": 31275 | |
| }, | |
| { | |
| "epoch": 15.77, | |
| "grad_norm": 0.8788714408874512, | |
| "learning_rate": 6.905728643216081e-06, | |
| "loss": 0.0026, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 15.78, | |
| "grad_norm": 0.8979476094245911, | |
| "learning_rate": 6.903216080402011e-06, | |
| "loss": 0.0023, | |
| "step": 31325 | |
| }, | |
| { | |
| "epoch": 15.79, | |
| "grad_norm": 0.406665176153183, | |
| "learning_rate": 6.90070351758794e-06, | |
| "loss": 0.0021, | |
| "step": 31350 | |
| }, | |
| { | |
| "epoch": 15.81, | |
| "grad_norm": 0.7604673504829407, | |
| "learning_rate": 6.89819095477387e-06, | |
| "loss": 0.0023, | |
| "step": 31375 | |
| }, | |
| { | |
| "epoch": 15.82, | |
| "grad_norm": 1.2543871402740479, | |
| "learning_rate": 6.895678391959799e-06, | |
| "loss": 0.0026, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 15.83, | |
| "grad_norm": 0.8910918235778809, | |
| "learning_rate": 6.8931658291457294e-06, | |
| "loss": 0.002, | |
| "step": 31425 | |
| }, | |
| { | |
| "epoch": 15.84, | |
| "grad_norm": 0.4898677468299866, | |
| "learning_rate": 6.890653266331658e-06, | |
| "loss": 0.0025, | |
| "step": 31450 | |
| }, | |
| { | |
| "epoch": 15.86, | |
| "grad_norm": 1.3053447008132935, | |
| "learning_rate": 6.888140703517588e-06, | |
| "loss": 0.003, | |
| "step": 31475 | |
| }, | |
| { | |
| "epoch": 15.87, | |
| "grad_norm": 1.2385072708129883, | |
| "learning_rate": 6.885628140703518e-06, | |
| "loss": 0.0023, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 15.88, | |
| "grad_norm": 1.81439208984375, | |
| "learning_rate": 6.8831155778894476e-06, | |
| "loss": 0.0022, | |
| "step": 31525 | |
| }, | |
| { | |
| "epoch": 15.89, | |
| "grad_norm": 2.093777894973755, | |
| "learning_rate": 6.880603015075378e-06, | |
| "loss": 0.0025, | |
| "step": 31550 | |
| }, | |
| { | |
| "epoch": 15.91, | |
| "grad_norm": 0.8900623321533203, | |
| "learning_rate": 6.878090452261307e-06, | |
| "loss": 0.0027, | |
| "step": 31575 | |
| }, | |
| { | |
| "epoch": 15.92, | |
| "grad_norm": 1.2843748331069946, | |
| "learning_rate": 6.875577889447237e-06, | |
| "loss": 0.0021, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 15.93, | |
| "grad_norm": 0.9298327565193176, | |
| "learning_rate": 6.873065326633166e-06, | |
| "loss": 0.0019, | |
| "step": 31625 | |
| }, | |
| { | |
| "epoch": 15.94, | |
| "grad_norm": 0.8842711448669434, | |
| "learning_rate": 6.870552763819096e-06, | |
| "loss": 0.0021, | |
| "step": 31650 | |
| }, | |
| { | |
| "epoch": 15.96, | |
| "grad_norm": 0.9891393184661865, | |
| "learning_rate": 6.868040201005026e-06, | |
| "loss": 0.0024, | |
| "step": 31675 | |
| }, | |
| { | |
| "epoch": 15.97, | |
| "grad_norm": 1.2643588781356812, | |
| "learning_rate": 6.865527638190955e-06, | |
| "loss": 0.0023, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 15.98, | |
| "grad_norm": 0.7340218424797058, | |
| "learning_rate": 6.8630150753768855e-06, | |
| "loss": 0.0024, | |
| "step": 31725 | |
| }, | |
| { | |
| "epoch": 15.99, | |
| "grad_norm": 1.090314507484436, | |
| "learning_rate": 6.860502512562814e-06, | |
| "loss": 0.0025, | |
| "step": 31750 | |
| }, | |
| { | |
| "epoch": 16.01, | |
| "grad_norm": 2.1509249210357666, | |
| "learning_rate": 6.857989949748744e-06, | |
| "loss": 0.0023, | |
| "step": 31775 | |
| }, | |
| { | |
| "epoch": 16.02, | |
| "grad_norm": 1.119979977607727, | |
| "learning_rate": 6.8554773869346735e-06, | |
| "loss": 0.0015, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 16.03, | |
| "grad_norm": 0.9909570217132568, | |
| "learning_rate": 6.852964824120604e-06, | |
| "loss": 0.002, | |
| "step": 31825 | |
| }, | |
| { | |
| "epoch": 16.05, | |
| "grad_norm": 1.8232905864715576, | |
| "learning_rate": 6.850452261306533e-06, | |
| "loss": 0.0016, | |
| "step": 31850 | |
| }, | |
| { | |
| "epoch": 16.06, | |
| "grad_norm": 1.7875540256500244, | |
| "learning_rate": 6.847939698492463e-06, | |
| "loss": 0.0015, | |
| "step": 31875 | |
| }, | |
| { | |
| "epoch": 16.07, | |
| "grad_norm": 1.3473516702651978, | |
| "learning_rate": 6.845427135678393e-06, | |
| "loss": 0.0019, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 16.08, | |
| "grad_norm": 0.521841824054718, | |
| "learning_rate": 6.842914572864322e-06, | |
| "loss": 0.0017, | |
| "step": 31925 | |
| }, | |
| { | |
| "epoch": 16.1, | |
| "grad_norm": 1.0813759565353394, | |
| "learning_rate": 6.840402010050252e-06, | |
| "loss": 0.0016, | |
| "step": 31950 | |
| }, | |
| { | |
| "epoch": 16.11, | |
| "grad_norm": 0.8133834004402161, | |
| "learning_rate": 6.837889447236181e-06, | |
| "loss": 0.0013, | |
| "step": 31975 | |
| }, | |
| { | |
| "epoch": 16.12, | |
| "grad_norm": 0.6576656103134155, | |
| "learning_rate": 6.8353768844221115e-06, | |
| "loss": 0.0016, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 16.12, | |
| "eval_loss": 0.3177714943885803, | |
| "eval_runtime": 643.1803, | |
| "eval_samples_per_second": 2.191, | |
| "eval_steps_per_second": 2.191, | |
| "eval_wer": 23.95710826703563, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 16.13, | |
| "grad_norm": 1.695088267326355, | |
| "learning_rate": 6.83286432160804e-06, | |
| "loss": 0.0019, | |
| "step": 32025 | |
| }, | |
| { | |
| "epoch": 16.15, | |
| "grad_norm": 0.49403443932533264, | |
| "learning_rate": 6.83035175879397e-06, | |
| "loss": 0.0017, | |
| "step": 32050 | |
| }, | |
| { | |
| "epoch": 16.16, | |
| "grad_norm": 1.2288966178894043, | |
| "learning_rate": 6.8278391959798995e-06, | |
| "loss": 0.0018, | |
| "step": 32075 | |
| }, | |
| { | |
| "epoch": 16.17, | |
| "grad_norm": 1.711982011795044, | |
| "learning_rate": 6.82532663316583e-06, | |
| "loss": 0.0018, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 16.18, | |
| "grad_norm": 0.9123796820640564, | |
| "learning_rate": 6.82281407035176e-06, | |
| "loss": 0.0022, | |
| "step": 32125 | |
| }, | |
| { | |
| "epoch": 16.2, | |
| "grad_norm": 0.8187395930290222, | |
| "learning_rate": 6.820301507537689e-06, | |
| "loss": 0.0018, | |
| "step": 32150 | |
| }, | |
| { | |
| "epoch": 16.21, | |
| "grad_norm": 0.676909327507019, | |
| "learning_rate": 6.817788944723619e-06, | |
| "loss": 0.0019, | |
| "step": 32175 | |
| }, | |
| { | |
| "epoch": 16.22, | |
| "grad_norm": 0.28308579325675964, | |
| "learning_rate": 6.815276381909548e-06, | |
| "loss": 0.0017, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 16.23, | |
| "grad_norm": 0.8627307415008545, | |
| "learning_rate": 6.812763819095478e-06, | |
| "loss": 0.0017, | |
| "step": 32225 | |
| }, | |
| { | |
| "epoch": 16.25, | |
| "grad_norm": 0.5035674571990967, | |
| "learning_rate": 6.810251256281407e-06, | |
| "loss": 0.0021, | |
| "step": 32250 | |
| }, | |
| { | |
| "epoch": 16.26, | |
| "grad_norm": 0.611066997051239, | |
| "learning_rate": 6.8077386934673374e-06, | |
| "loss": 0.0018, | |
| "step": 32275 | |
| }, | |
| { | |
| "epoch": 16.27, | |
| "grad_norm": 1.7766281366348267, | |
| "learning_rate": 6.805226130653268e-06, | |
| "loss": 0.002, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 16.28, | |
| "grad_norm": 0.9835132956504822, | |
| "learning_rate": 6.802713567839196e-06, | |
| "loss": 0.0018, | |
| "step": 32325 | |
| }, | |
| { | |
| "epoch": 16.3, | |
| "grad_norm": 1.363574504852295, | |
| "learning_rate": 6.800201005025126e-06, | |
| "loss": 0.002, | |
| "step": 32350 | |
| }, | |
| { | |
| "epoch": 16.31, | |
| "grad_norm": 1.0854887962341309, | |
| "learning_rate": 6.7976884422110556e-06, | |
| "loss": 0.0019, | |
| "step": 32375 | |
| }, | |
| { | |
| "epoch": 16.32, | |
| "grad_norm": 2.8377525806427, | |
| "learning_rate": 6.795175879396986e-06, | |
| "loss": 0.0023, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 16.34, | |
| "grad_norm": 2.0450568199157715, | |
| "learning_rate": 6.792663316582915e-06, | |
| "loss": 0.0019, | |
| "step": 32425 | |
| }, | |
| { | |
| "epoch": 16.35, | |
| "grad_norm": 1.6299299001693726, | |
| "learning_rate": 6.790150753768845e-06, | |
| "loss": 0.002, | |
| "step": 32450 | |
| }, | |
| { | |
| "epoch": 16.36, | |
| "grad_norm": 1.7007014751434326, | |
| "learning_rate": 6.787638190954774e-06, | |
| "loss": 0.0022, | |
| "step": 32475 | |
| }, | |
| { | |
| "epoch": 16.37, | |
| "grad_norm": 1.5185723304748535, | |
| "learning_rate": 6.785125628140704e-06, | |
| "loss": 0.002, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 16.39, | |
| "grad_norm": 1.15962815284729, | |
| "learning_rate": 6.782613065326634e-06, | |
| "loss": 0.002, | |
| "step": 32525 | |
| }, | |
| { | |
| "epoch": 16.4, | |
| "grad_norm": 0.9685630798339844, | |
| "learning_rate": 6.780100502512563e-06, | |
| "loss": 0.0023, | |
| "step": 32550 | |
| }, | |
| { | |
| "epoch": 16.41, | |
| "grad_norm": 0.7952429056167603, | |
| "learning_rate": 6.7775879396984935e-06, | |
| "loss": 0.0024, | |
| "step": 32575 | |
| }, | |
| { | |
| "epoch": 16.42, | |
| "grad_norm": 1.4336612224578857, | |
| "learning_rate": 6.775075376884422e-06, | |
| "loss": 0.0019, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 16.44, | |
| "grad_norm": 1.155975580215454, | |
| "learning_rate": 6.772562814070352e-06, | |
| "loss": 0.0023, | |
| "step": 32625 | |
| }, | |
| { | |
| "epoch": 16.45, | |
| "grad_norm": 0.660798966884613, | |
| "learning_rate": 6.7700502512562815e-06, | |
| "loss": 0.0024, | |
| "step": 32650 | |
| }, | |
| { | |
| "epoch": 16.46, | |
| "grad_norm": 1.6323788166046143, | |
| "learning_rate": 6.767537688442212e-06, | |
| "loss": 0.002, | |
| "step": 32675 | |
| }, | |
| { | |
| "epoch": 16.47, | |
| "grad_norm": 0.47817263007164, | |
| "learning_rate": 6.765125628140704e-06, | |
| "loss": 0.0023, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 16.49, | |
| "grad_norm": 1.6108895540237427, | |
| "learning_rate": 6.762613065326634e-06, | |
| "loss": 0.0024, | |
| "step": 32725 | |
| }, | |
| { | |
| "epoch": 16.5, | |
| "grad_norm": 1.8570855855941772, | |
| "learning_rate": 6.7601005025125636e-06, | |
| "loss": 0.0025, | |
| "step": 32750 | |
| }, | |
| { | |
| "epoch": 16.51, | |
| "grad_norm": 1.3422707319259644, | |
| "learning_rate": 6.757587939698494e-06, | |
| "loss": 0.002, | |
| "step": 32775 | |
| }, | |
| { | |
| "epoch": 16.52, | |
| "grad_norm": 0.9396295547485352, | |
| "learning_rate": 6.755075376884422e-06, | |
| "loss": 0.0018, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 16.54, | |
| "grad_norm": 0.6690593361854553, | |
| "learning_rate": 6.752562814070352e-06, | |
| "loss": 0.0024, | |
| "step": 32825 | |
| }, | |
| { | |
| "epoch": 16.55, | |
| "grad_norm": 0.6064794659614563, | |
| "learning_rate": 6.750050251256282e-06, | |
| "loss": 0.0019, | |
| "step": 32850 | |
| }, | |
| { | |
| "epoch": 16.56, | |
| "grad_norm": 0.6732133626937866, | |
| "learning_rate": 6.747537688442212e-06, | |
| "loss": 0.0019, | |
| "step": 32875 | |
| }, | |
| { | |
| "epoch": 16.57, | |
| "grad_norm": 1.029380202293396, | |
| "learning_rate": 6.74502512562814e-06, | |
| "loss": 0.0023, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 16.59, | |
| "grad_norm": 0.659989058971405, | |
| "learning_rate": 6.7425125628140705e-06, | |
| "loss": 0.0021, | |
| "step": 32925 | |
| }, | |
| { | |
| "epoch": 16.6, | |
| "grad_norm": 0.6724833846092224, | |
| "learning_rate": 6.740000000000001e-06, | |
| "loss": 0.002, | |
| "step": 32950 | |
| }, | |
| { | |
| "epoch": 16.61, | |
| "grad_norm": 1.073951005935669, | |
| "learning_rate": 6.73748743718593e-06, | |
| "loss": 0.0018, | |
| "step": 32975 | |
| }, | |
| { | |
| "epoch": 16.62, | |
| "grad_norm": 0.2644835412502289, | |
| "learning_rate": 6.73497487437186e-06, | |
| "loss": 0.0015, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 16.62, | |
| "eval_loss": 0.31893399357795715, | |
| "eval_runtime": 638.1335, | |
| "eval_samples_per_second": 2.208, | |
| "eval_steps_per_second": 2.208, | |
| "eval_wer": 23.625043237634035, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 16.64, | |
| "grad_norm": 0.4319547712802887, | |
| "learning_rate": 6.7324623115577895e-06, | |
| "loss": 0.0021, | |
| "step": 33025 | |
| }, | |
| { | |
| "epoch": 16.65, | |
| "grad_norm": 0.5117477178573608, | |
| "learning_rate": 6.72994974874372e-06, | |
| "loss": 0.0017, | |
| "step": 33050 | |
| }, | |
| { | |
| "epoch": 16.66, | |
| "grad_norm": 0.599642276763916, | |
| "learning_rate": 6.727437185929648e-06, | |
| "loss": 0.0022, | |
| "step": 33075 | |
| }, | |
| { | |
| "epoch": 16.68, | |
| "grad_norm": 2.386610984802246, | |
| "learning_rate": 6.724924623115578e-06, | |
| "loss": 0.0024, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 16.69, | |
| "grad_norm": 0.7631763219833374, | |
| "learning_rate": 6.7224120603015085e-06, | |
| "loss": 0.0019, | |
| "step": 33125 | |
| }, | |
| { | |
| "epoch": 16.7, | |
| "grad_norm": 1.534925937652588, | |
| "learning_rate": 6.719899497487438e-06, | |
| "loss": 0.0022, | |
| "step": 33150 | |
| }, | |
| { | |
| "epoch": 16.71, | |
| "grad_norm": 1.815709114074707, | |
| "learning_rate": 6.717386934673368e-06, | |
| "loss": 0.0022, | |
| "step": 33175 | |
| }, | |
| { | |
| "epoch": 16.73, | |
| "grad_norm": 0.47216132283210754, | |
| "learning_rate": 6.7148743718592965e-06, | |
| "loss": 0.002, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 16.74, | |
| "grad_norm": 1.2903132438659668, | |
| "learning_rate": 6.712361809045227e-06, | |
| "loss": 0.0024, | |
| "step": 33225 | |
| }, | |
| { | |
| "epoch": 16.75, | |
| "grad_norm": 1.4618940353393555, | |
| "learning_rate": 6.709849246231156e-06, | |
| "loss": 0.0021, | |
| "step": 33250 | |
| }, | |
| { | |
| "epoch": 16.76, | |
| "grad_norm": 2.112274169921875, | |
| "learning_rate": 6.707336683417086e-06, | |
| "loss": 0.0021, | |
| "step": 33275 | |
| }, | |
| { | |
| "epoch": 16.78, | |
| "grad_norm": 1.849009394645691, | |
| "learning_rate": 6.7048241206030155e-06, | |
| "loss": 0.0023, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 16.79, | |
| "grad_norm": 1.0181339979171753, | |
| "learning_rate": 6.702311557788946e-06, | |
| "loss": 0.002, | |
| "step": 33325 | |
| }, | |
| { | |
| "epoch": 16.8, | |
| "grad_norm": 1.6199973821640015, | |
| "learning_rate": 6.699798994974876e-06, | |
| "loss": 0.0024, | |
| "step": 33350 | |
| }, | |
| { | |
| "epoch": 16.81, | |
| "grad_norm": 0.8824648261070251, | |
| "learning_rate": 6.697286432160804e-06, | |
| "loss": 0.0021, | |
| "step": 33375 | |
| }, | |
| { | |
| "epoch": 16.83, | |
| "grad_norm": 1.2682048082351685, | |
| "learning_rate": 6.6947738693467344e-06, | |
| "loss": 0.0021, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 16.84, | |
| "grad_norm": 0.9669592976570129, | |
| "learning_rate": 6.692261306532664e-06, | |
| "loss": 0.0019, | |
| "step": 33425 | |
| }, | |
| { | |
| "epoch": 16.85, | |
| "grad_norm": 2.088453769683838, | |
| "learning_rate": 6.689748743718594e-06, | |
| "loss": 0.0018, | |
| "step": 33450 | |
| }, | |
| { | |
| "epoch": 16.86, | |
| "grad_norm": 1.75133216381073, | |
| "learning_rate": 6.687236180904522e-06, | |
| "loss": 0.0023, | |
| "step": 33475 | |
| }, | |
| { | |
| "epoch": 16.88, | |
| "grad_norm": 2.224334239959717, | |
| "learning_rate": 6.684723618090453e-06, | |
| "loss": 0.0019, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 16.89, | |
| "grad_norm": 0.6259239315986633, | |
| "learning_rate": 6.682211055276382e-06, | |
| "loss": 0.0026, | |
| "step": 33525 | |
| }, | |
| { | |
| "epoch": 16.9, | |
| "grad_norm": 1.3590632677078247, | |
| "learning_rate": 6.679698492462312e-06, | |
| "loss": 0.0022, | |
| "step": 33550 | |
| }, | |
| { | |
| "epoch": 16.91, | |
| "grad_norm": 1.8630064725875854, | |
| "learning_rate": 6.677185929648242e-06, | |
| "loss": 0.0025, | |
| "step": 33575 | |
| }, | |
| { | |
| "epoch": 16.93, | |
| "grad_norm": 1.7753084897994995, | |
| "learning_rate": 6.6746733668341716e-06, | |
| "loss": 0.0022, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 16.94, | |
| "grad_norm": 0.30768975615501404, | |
| "learning_rate": 6.672160804020102e-06, | |
| "loss": 0.0021, | |
| "step": 33625 | |
| }, | |
| { | |
| "epoch": 16.95, | |
| "grad_norm": 1.4861748218536377, | |
| "learning_rate": 6.66964824120603e-06, | |
| "loss": 0.002, | |
| "step": 33650 | |
| }, | |
| { | |
| "epoch": 16.96, | |
| "grad_norm": 1.5577760934829712, | |
| "learning_rate": 6.66713567839196e-06, | |
| "loss": 0.002, | |
| "step": 33675 | |
| }, | |
| { | |
| "epoch": 16.98, | |
| "grad_norm": 0.8203927278518677, | |
| "learning_rate": 6.66462311557789e-06, | |
| "loss": 0.0019, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 16.99, | |
| "grad_norm": 1.1603564023971558, | |
| "learning_rate": 6.66211055276382e-06, | |
| "loss": 0.0021, | |
| "step": 33725 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 0.41753071546554565, | |
| "learning_rate": 6.65959798994975e-06, | |
| "loss": 0.0019, | |
| "step": 33750 | |
| }, | |
| { | |
| "epoch": 17.02, | |
| "grad_norm": 0.49397504329681396, | |
| "learning_rate": 6.6570854271356785e-06, | |
| "loss": 0.0016, | |
| "step": 33775 | |
| }, | |
| { | |
| "epoch": 17.03, | |
| "grad_norm": 1.637376070022583, | |
| "learning_rate": 6.654572864321609e-06, | |
| "loss": 0.0017, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 17.04, | |
| "grad_norm": 0.45649200677871704, | |
| "learning_rate": 6.652060301507538e-06, | |
| "loss": 0.0017, | |
| "step": 33825 | |
| }, | |
| { | |
| "epoch": 17.05, | |
| "grad_norm": 1.1269056797027588, | |
| "learning_rate": 6.649547738693468e-06, | |
| "loss": 0.0021, | |
| "step": 33850 | |
| }, | |
| { | |
| "epoch": 17.07, | |
| "grad_norm": 1.279366374015808, | |
| "learning_rate": 6.6470351758793975e-06, | |
| "loss": 0.0016, | |
| "step": 33875 | |
| }, | |
| { | |
| "epoch": 17.08, | |
| "grad_norm": 1.3553489446640015, | |
| "learning_rate": 6.644522613065328e-06, | |
| "loss": 0.0014, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 17.09, | |
| "grad_norm": 0.3694764971733093, | |
| "learning_rate": 6.642010050251256e-06, | |
| "loss": 0.0015, | |
| "step": 33925 | |
| }, | |
| { | |
| "epoch": 17.1, | |
| "grad_norm": 0.7455251812934875, | |
| "learning_rate": 6.639497487437186e-06, | |
| "loss": 0.0015, | |
| "step": 33950 | |
| }, | |
| { | |
| "epoch": 17.12, | |
| "grad_norm": 1.4663114547729492, | |
| "learning_rate": 6.6369849246231165e-06, | |
| "loss": 0.0014, | |
| "step": 33975 | |
| }, | |
| { | |
| "epoch": 17.13, | |
| "grad_norm": 0.41065290570259094, | |
| "learning_rate": 6.634472361809046e-06, | |
| "loss": 0.0012, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 17.13, | |
| "eval_loss": 0.31717780232429504, | |
| "eval_runtime": 640.7627, | |
| "eval_samples_per_second": 2.199, | |
| "eval_steps_per_second": 2.199, | |
| "eval_wer": 22.94707713593912, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 17.14, | |
| "grad_norm": 0.921389102935791, | |
| "learning_rate": 6.631959798994976e-06, | |
| "loss": 0.0013, | |
| "step": 34025 | |
| }, | |
| { | |
| "epoch": 17.15, | |
| "grad_norm": 1.2974750995635986, | |
| "learning_rate": 6.6294472361809045e-06, | |
| "loss": 0.0015, | |
| "step": 34050 | |
| }, | |
| { | |
| "epoch": 17.17, | |
| "grad_norm": 0.946368932723999, | |
| "learning_rate": 6.626934673366835e-06, | |
| "loss": 0.0015, | |
| "step": 34075 | |
| }, | |
| { | |
| "epoch": 17.18, | |
| "grad_norm": 1.1156178712844849, | |
| "learning_rate": 6.624422110552764e-06, | |
| "loss": 0.0015, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 17.19, | |
| "grad_norm": 0.447689026594162, | |
| "learning_rate": 6.621909547738694e-06, | |
| "loss": 0.0016, | |
| "step": 34125 | |
| }, | |
| { | |
| "epoch": 17.2, | |
| "grad_norm": 0.7558609247207642, | |
| "learning_rate": 6.6193969849246235e-06, | |
| "loss": 0.0016, | |
| "step": 34150 | |
| }, | |
| { | |
| "epoch": 17.22, | |
| "grad_norm": 0.8499734997749329, | |
| "learning_rate": 6.616884422110554e-06, | |
| "loss": 0.0017, | |
| "step": 34175 | |
| }, | |
| { | |
| "epoch": 17.23, | |
| "grad_norm": 0.40783509612083435, | |
| "learning_rate": 6.614371859296484e-06, | |
| "loss": 0.002, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 17.24, | |
| "grad_norm": 1.5999126434326172, | |
| "learning_rate": 6.611859296482412e-06, | |
| "loss": 0.002, | |
| "step": 34225 | |
| }, | |
| { | |
| "epoch": 17.25, | |
| "grad_norm": 0.852052628993988, | |
| "learning_rate": 6.6093467336683424e-06, | |
| "loss": 0.0019, | |
| "step": 34250 | |
| }, | |
| { | |
| "epoch": 17.27, | |
| "grad_norm": 0.36311525106430054, | |
| "learning_rate": 6.606834170854272e-06, | |
| "loss": 0.0017, | |
| "step": 34275 | |
| }, | |
| { | |
| "epoch": 17.28, | |
| "grad_norm": 2.138871669769287, | |
| "learning_rate": 6.604321608040202e-06, | |
| "loss": 0.0016, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 17.29, | |
| "grad_norm": 1.056746482849121, | |
| "learning_rate": 6.60180904522613e-06, | |
| "loss": 0.0015, | |
| "step": 34325 | |
| }, | |
| { | |
| "epoch": 17.3, | |
| "grad_norm": 0.8301752805709839, | |
| "learning_rate": 6.599296482412061e-06, | |
| "loss": 0.002, | |
| "step": 34350 | |
| }, | |
| { | |
| "epoch": 17.32, | |
| "grad_norm": 1.781783938407898, | |
| "learning_rate": 6.596783919597991e-06, | |
| "loss": 0.0017, | |
| "step": 34375 | |
| }, | |
| { | |
| "epoch": 17.33, | |
| "grad_norm": 1.2563107013702393, | |
| "learning_rate": 6.59427135678392e-06, | |
| "loss": 0.0017, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 17.34, | |
| "grad_norm": 2.3324105739593506, | |
| "learning_rate": 6.59175879396985e-06, | |
| "loss": 0.002, | |
| "step": 34425 | |
| }, | |
| { | |
| "epoch": 17.36, | |
| "grad_norm": 1.0351413488388062, | |
| "learning_rate": 6.5892462311557796e-06, | |
| "loss": 0.002, | |
| "step": 34450 | |
| }, | |
| { | |
| "epoch": 17.37, | |
| "grad_norm": 1.9729125499725342, | |
| "learning_rate": 6.58673366834171e-06, | |
| "loss": 0.0019, | |
| "step": 34475 | |
| }, | |
| { | |
| "epoch": 17.38, | |
| "grad_norm": 0.7360727787017822, | |
| "learning_rate": 6.584221105527638e-06, | |
| "loss": 0.0016, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 17.39, | |
| "grad_norm": 1.5218764543533325, | |
| "learning_rate": 6.581708542713568e-06, | |
| "loss": 0.0017, | |
| "step": 34525 | |
| }, | |
| { | |
| "epoch": 17.41, | |
| "grad_norm": 0.9143256545066833, | |
| "learning_rate": 6.579195979899498e-06, | |
| "loss": 0.002, | |
| "step": 34550 | |
| }, | |
| { | |
| "epoch": 17.42, | |
| "grad_norm": 1.5911108255386353, | |
| "learning_rate": 6.576683417085428e-06, | |
| "loss": 0.0021, | |
| "step": 34575 | |
| }, | |
| { | |
| "epoch": 17.43, | |
| "grad_norm": 1.1945171356201172, | |
| "learning_rate": 6.574170854271358e-06, | |
| "loss": 0.0021, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 17.44, | |
| "grad_norm": 0.6065207719802856, | |
| "learning_rate": 6.5716582914572865e-06, | |
| "loss": 0.0015, | |
| "step": 34625 | |
| }, | |
| { | |
| "epoch": 17.46, | |
| "grad_norm": 1.3287162780761719, | |
| "learning_rate": 6.569145728643217e-06, | |
| "loss": 0.0015, | |
| "step": 34650 | |
| }, | |
| { | |
| "epoch": 17.47, | |
| "grad_norm": 1.1048755645751953, | |
| "learning_rate": 6.566633165829146e-06, | |
| "loss": 0.0016, | |
| "step": 34675 | |
| }, | |
| { | |
| "epoch": 17.48, | |
| "grad_norm": 1.8290241956710815, | |
| "learning_rate": 6.564120603015076e-06, | |
| "loss": 0.0017, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 17.49, | |
| "grad_norm": 0.7153010964393616, | |
| "learning_rate": 6.5616080402010055e-06, | |
| "loss": 0.0017, | |
| "step": 34725 | |
| }, | |
| { | |
| "epoch": 17.51, | |
| "grad_norm": 1.1753082275390625, | |
| "learning_rate": 6.559095477386936e-06, | |
| "loss": 0.0019, | |
| "step": 34750 | |
| }, | |
| { | |
| "epoch": 17.52, | |
| "grad_norm": 0.8656060695648193, | |
| "learning_rate": 6.556582914572864e-06, | |
| "loss": 0.0018, | |
| "step": 34775 | |
| }, | |
| { | |
| "epoch": 17.53, | |
| "grad_norm": 0.9170093536376953, | |
| "learning_rate": 6.554070351758794e-06, | |
| "loss": 0.0017, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 17.54, | |
| "grad_norm": 0.8003792762756348, | |
| "learning_rate": 6.5515577889447245e-06, | |
| "loss": 0.0016, | |
| "step": 34825 | |
| }, | |
| { | |
| "epoch": 17.56, | |
| "grad_norm": 0.9868853092193604, | |
| "learning_rate": 6.549145728643217e-06, | |
| "loss": 0.0015, | |
| "step": 34850 | |
| }, | |
| { | |
| "epoch": 17.57, | |
| "grad_norm": 1.0430176258087158, | |
| "learning_rate": 6.546633165829146e-06, | |
| "loss": 0.0018, | |
| "step": 34875 | |
| }, | |
| { | |
| "epoch": 17.58, | |
| "grad_norm": 0.36971691250801086, | |
| "learning_rate": 6.544120603015076e-06, | |
| "loss": 0.0018, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 17.59, | |
| "grad_norm": 0.87406325340271, | |
| "learning_rate": 6.541608040201005e-06, | |
| "loss": 0.002, | |
| "step": 34925 | |
| }, | |
| { | |
| "epoch": 17.61, | |
| "grad_norm": 2.4083333015441895, | |
| "learning_rate": 6.539095477386935e-06, | |
| "loss": 0.002, | |
| "step": 34950 | |
| }, | |
| { | |
| "epoch": 17.62, | |
| "grad_norm": 0.49866533279418945, | |
| "learning_rate": 6.536582914572864e-06, | |
| "loss": 0.0021, | |
| "step": 34975 | |
| }, | |
| { | |
| "epoch": 17.63, | |
| "grad_norm": 0.744525134563446, | |
| "learning_rate": 6.5340703517587945e-06, | |
| "loss": 0.0021, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 17.63, | |
| "eval_loss": 0.3279932141304016, | |
| "eval_runtime": 640.0538, | |
| "eval_samples_per_second": 2.201, | |
| "eval_steps_per_second": 2.201, | |
| "eval_wer": 23.251470079557247, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 17.64, | |
| "grad_norm": 1.2228732109069824, | |
| "learning_rate": 6.531557788944725e-06, | |
| "loss": 0.0018, | |
| "step": 35025 | |
| }, | |
| { | |
| "epoch": 17.66, | |
| "grad_norm": 2.64949631690979, | |
| "learning_rate": 6.529045226130654e-06, | |
| "loss": 0.0019, | |
| "step": 35050 | |
| }, | |
| { | |
| "epoch": 17.67, | |
| "grad_norm": 0.8812341094017029, | |
| "learning_rate": 6.526532663316583e-06, | |
| "loss": 0.0016, | |
| "step": 35075 | |
| }, | |
| { | |
| "epoch": 17.68, | |
| "grad_norm": 1.3396104574203491, | |
| "learning_rate": 6.524020100502513e-06, | |
| "loss": 0.0017, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 17.7, | |
| "grad_norm": 0.6547167897224426, | |
| "learning_rate": 6.521507537688443e-06, | |
| "loss": 0.0019, | |
| "step": 35125 | |
| }, | |
| { | |
| "epoch": 17.71, | |
| "grad_norm": 1.9075217247009277, | |
| "learning_rate": 6.518994974874372e-06, | |
| "loss": 0.0017, | |
| "step": 35150 | |
| }, | |
| { | |
| "epoch": 17.72, | |
| "grad_norm": 1.7751950025558472, | |
| "learning_rate": 6.516482412060302e-06, | |
| "loss": 0.0021, | |
| "step": 35175 | |
| }, | |
| { | |
| "epoch": 17.73, | |
| "grad_norm": 1.1410751342773438, | |
| "learning_rate": 6.514070351758795e-06, | |
| "loss": 0.0021, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 17.75, | |
| "grad_norm": 0.8662394285202026, | |
| "learning_rate": 6.511557788944725e-06, | |
| "loss": 0.0018, | |
| "step": 35225 | |
| }, | |
| { | |
| "epoch": 17.76, | |
| "grad_norm": 1.586671233177185, | |
| "learning_rate": 6.509045226130653e-06, | |
| "loss": 0.0025, | |
| "step": 35250 | |
| }, | |
| { | |
| "epoch": 17.77, | |
| "grad_norm": 1.0892744064331055, | |
| "learning_rate": 6.5065326633165835e-06, | |
| "loss": 0.0024, | |
| "step": 35275 | |
| }, | |
| { | |
| "epoch": 17.78, | |
| "grad_norm": 0.37060225009918213, | |
| "learning_rate": 6.504020100502513e-06, | |
| "loss": 0.0021, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 17.8, | |
| "grad_norm": 0.7521613240242004, | |
| "learning_rate": 6.501507537688443e-06, | |
| "loss": 0.0021, | |
| "step": 35325 | |
| }, | |
| { | |
| "epoch": 17.81, | |
| "grad_norm": 2.0023598670959473, | |
| "learning_rate": 6.4989949748743715e-06, | |
| "loss": 0.0017, | |
| "step": 35350 | |
| }, | |
| { | |
| "epoch": 17.82, | |
| "grad_norm": 1.1638795137405396, | |
| "learning_rate": 6.496482412060302e-06, | |
| "loss": 0.0019, | |
| "step": 35375 | |
| }, | |
| { | |
| "epoch": 17.83, | |
| "grad_norm": 1.654910922050476, | |
| "learning_rate": 6.493969849246232e-06, | |
| "loss": 0.002, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 17.85, | |
| "grad_norm": 1.0789589881896973, | |
| "learning_rate": 6.491457286432161e-06, | |
| "loss": 0.0022, | |
| "step": 35425 | |
| }, | |
| { | |
| "epoch": 17.86, | |
| "grad_norm": 1.5805654525756836, | |
| "learning_rate": 6.488944723618091e-06, | |
| "loss": 0.002, | |
| "step": 35450 | |
| }, | |
| { | |
| "epoch": 17.87, | |
| "grad_norm": 1.393417239189148, | |
| "learning_rate": 6.486432160804021e-06, | |
| "loss": 0.0019, | |
| "step": 35475 | |
| }, | |
| { | |
| "epoch": 17.88, | |
| "grad_norm": 1.4259059429168701, | |
| "learning_rate": 6.483919597989951e-06, | |
| "loss": 0.0018, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 17.9, | |
| "grad_norm": 1.371140956878662, | |
| "learning_rate": 6.481407035175879e-06, | |
| "loss": 0.0019, | |
| "step": 35525 | |
| }, | |
| { | |
| "epoch": 17.91, | |
| "grad_norm": 1.099618673324585, | |
| "learning_rate": 6.4788944723618095e-06, | |
| "loss": 0.0019, | |
| "step": 35550 | |
| }, | |
| { | |
| "epoch": 17.92, | |
| "grad_norm": 0.654387354850769, | |
| "learning_rate": 6.476381909547739e-06, | |
| "loss": 0.0019, | |
| "step": 35575 | |
| }, | |
| { | |
| "epoch": 17.93, | |
| "grad_norm": 1.3511158227920532, | |
| "learning_rate": 6.473869346733669e-06, | |
| "loss": 0.0017, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 17.95, | |
| "grad_norm": 1.1146668195724487, | |
| "learning_rate": 6.471356783919599e-06, | |
| "loss": 0.0019, | |
| "step": 35625 | |
| }, | |
| { | |
| "epoch": 17.96, | |
| "grad_norm": 1.2822802066802979, | |
| "learning_rate": 6.468844221105528e-06, | |
| "loss": 0.0019, | |
| "step": 35650 | |
| }, | |
| { | |
| "epoch": 17.97, | |
| "grad_norm": 0.6118746399879456, | |
| "learning_rate": 6.466331658291458e-06, | |
| "loss": 0.0021, | |
| "step": 35675 | |
| }, | |
| { | |
| "epoch": 17.98, | |
| "grad_norm": 1.6765689849853516, | |
| "learning_rate": 6.463819095477387e-06, | |
| "loss": 0.002, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 0.8929309844970703, | |
| "learning_rate": 6.461306532663317e-06, | |
| "loss": 0.002, | |
| "step": 35725 | |
| }, | |
| { | |
| "epoch": 18.01, | |
| "grad_norm": 0.8236270546913147, | |
| "learning_rate": 6.458793969849247e-06, | |
| "loss": 0.0014, | |
| "step": 35750 | |
| }, | |
| { | |
| "epoch": 18.02, | |
| "grad_norm": 0.3765973150730133, | |
| "learning_rate": 6.456281407035177e-06, | |
| "loss": 0.001, | |
| "step": 35775 | |
| }, | |
| { | |
| "epoch": 18.04, | |
| "grad_norm": 0.7577652931213379, | |
| "learning_rate": 6.453768844221107e-06, | |
| "loss": 0.0009, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 18.05, | |
| "grad_norm": 1.3990800380706787, | |
| "learning_rate": 6.451256281407035e-06, | |
| "loss": 0.0012, | |
| "step": 35825 | |
| }, | |
| { | |
| "epoch": 18.06, | |
| "grad_norm": 0.6104835271835327, | |
| "learning_rate": 6.448743718592966e-06, | |
| "loss": 0.0017, | |
| "step": 35850 | |
| }, | |
| { | |
| "epoch": 18.07, | |
| "grad_norm": 0.8785896301269531, | |
| "learning_rate": 6.446231155778895e-06, | |
| "loss": 0.0014, | |
| "step": 35875 | |
| }, | |
| { | |
| "epoch": 18.09, | |
| "grad_norm": 0.3868306875228882, | |
| "learning_rate": 6.443718592964825e-06, | |
| "loss": 0.0014, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 18.1, | |
| "grad_norm": 1.184727430343628, | |
| "learning_rate": 6.4412060301507536e-06, | |
| "loss": 0.0012, | |
| "step": 35925 | |
| }, | |
| { | |
| "epoch": 18.11, | |
| "grad_norm": 1.4710326194763184, | |
| "learning_rate": 6.438693467336684e-06, | |
| "loss": 0.0015, | |
| "step": 35950 | |
| }, | |
| { | |
| "epoch": 18.12, | |
| "grad_norm": 0.4654022753238678, | |
| "learning_rate": 6.436180904522613e-06, | |
| "loss": 0.0011, | |
| "step": 35975 | |
| }, | |
| { | |
| "epoch": 18.14, | |
| "grad_norm": 0.30787691473960876, | |
| "learning_rate": 6.433668341708543e-06, | |
| "loss": 0.0017, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 18.14, | |
| "eval_loss": 0.3324070870876312, | |
| "eval_runtime": 779.64, | |
| "eval_samples_per_second": 1.807, | |
| "eval_steps_per_second": 1.807, | |
| "eval_wer": 23.583535108958838, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 18.15, | |
| "grad_norm": 2.8175275325775146, | |
| "learning_rate": 6.431155778894473e-06, | |
| "loss": 0.0015, | |
| "step": 36025 | |
| }, | |
| { | |
| "epoch": 18.16, | |
| "grad_norm": 1.644282579421997, | |
| "learning_rate": 6.428643216080403e-06, | |
| "loss": 0.0017, | |
| "step": 36050 | |
| }, | |
| { | |
| "epoch": 18.17, | |
| "grad_norm": 1.2739876508712769, | |
| "learning_rate": 6.426130653266333e-06, | |
| "loss": 0.0016, | |
| "step": 36075 | |
| }, | |
| { | |
| "epoch": 18.19, | |
| "grad_norm": 1.1111208200454712, | |
| "learning_rate": 6.423618090452261e-06, | |
| "loss": 0.0011, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 18.2, | |
| "grad_norm": 0.3262185752391815, | |
| "learning_rate": 6.4211055276381915e-06, | |
| "loss": 0.0012, | |
| "step": 36125 | |
| }, | |
| { | |
| "epoch": 18.21, | |
| "grad_norm": 1.090649962425232, | |
| "learning_rate": 6.418592964824121e-06, | |
| "loss": 0.0019, | |
| "step": 36150 | |
| }, | |
| { | |
| "epoch": 18.22, | |
| "grad_norm": 0.6180118322372437, | |
| "learning_rate": 6.416080402010051e-06, | |
| "loss": 0.0016, | |
| "step": 36175 | |
| }, | |
| { | |
| "epoch": 18.24, | |
| "grad_norm": 1.0317612886428833, | |
| "learning_rate": 6.4135678391959795e-06, | |
| "loss": 0.0019, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 18.25, | |
| "grad_norm": 0.2697906494140625, | |
| "learning_rate": 6.41105527638191e-06, | |
| "loss": 0.0015, | |
| "step": 36225 | |
| }, | |
| { | |
| "epoch": 18.26, | |
| "grad_norm": 1.2288458347320557, | |
| "learning_rate": 6.40854271356784e-06, | |
| "loss": 0.0016, | |
| "step": 36250 | |
| }, | |
| { | |
| "epoch": 18.27, | |
| "grad_norm": 1.6531765460968018, | |
| "learning_rate": 6.406030150753769e-06, | |
| "loss": 0.0013, | |
| "step": 36275 | |
| }, | |
| { | |
| "epoch": 18.29, | |
| "grad_norm": 0.38945141434669495, | |
| "learning_rate": 6.403517587939699e-06, | |
| "loss": 0.0014, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 18.3, | |
| "grad_norm": 0.8794446587562561, | |
| "learning_rate": 6.401005025125629e-06, | |
| "loss": 0.0013, | |
| "step": 36325 | |
| }, | |
| { | |
| "epoch": 18.31, | |
| "grad_norm": 0.6100822687149048, | |
| "learning_rate": 6.398492462311559e-06, | |
| "loss": 0.0016, | |
| "step": 36350 | |
| }, | |
| { | |
| "epoch": 18.32, | |
| "grad_norm": 1.371356725692749, | |
| "learning_rate": 6.395979899497487e-06, | |
| "loss": 0.0017, | |
| "step": 36375 | |
| }, | |
| { | |
| "epoch": 18.34, | |
| "grad_norm": 0.791754424571991, | |
| "learning_rate": 6.3934673366834175e-06, | |
| "loss": 0.0014, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 18.35, | |
| "grad_norm": 1.5653916597366333, | |
| "learning_rate": 6.390954773869347e-06, | |
| "loss": 0.0017, | |
| "step": 36425 | |
| }, | |
| { | |
| "epoch": 18.36, | |
| "grad_norm": 0.2556110620498657, | |
| "learning_rate": 6.388442211055277e-06, | |
| "loss": 0.0017, | |
| "step": 36450 | |
| }, | |
| { | |
| "epoch": 18.38, | |
| "grad_norm": 0.7103545665740967, | |
| "learning_rate": 6.385929648241207e-06, | |
| "loss": 0.0016, | |
| "step": 36475 | |
| }, | |
| { | |
| "epoch": 18.39, | |
| "grad_norm": 1.2815943956375122, | |
| "learning_rate": 6.383417085427136e-06, | |
| "loss": 0.0019, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 18.4, | |
| "grad_norm": 0.8965465426445007, | |
| "learning_rate": 6.380904522613066e-06, | |
| "loss": 0.0019, | |
| "step": 36525 | |
| }, | |
| { | |
| "epoch": 18.41, | |
| "grad_norm": 1.0410230159759521, | |
| "learning_rate": 6.378391959798995e-06, | |
| "loss": 0.0015, | |
| "step": 36550 | |
| }, | |
| { | |
| "epoch": 18.43, | |
| "grad_norm": 1.2315019369125366, | |
| "learning_rate": 6.375879396984925e-06, | |
| "loss": 0.0015, | |
| "step": 36575 | |
| }, | |
| { | |
| "epoch": 18.44, | |
| "grad_norm": 0.7894676327705383, | |
| "learning_rate": 6.373366834170855e-06, | |
| "loss": 0.0012, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 18.45, | |
| "grad_norm": 1.8946139812469482, | |
| "learning_rate": 6.370854271356785e-06, | |
| "loss": 0.0016, | |
| "step": 36625 | |
| }, | |
| { | |
| "epoch": 18.46, | |
| "grad_norm": 0.4600735306739807, | |
| "learning_rate": 6.368341708542715e-06, | |
| "loss": 0.002, | |
| "step": 36650 | |
| }, | |
| { | |
| "epoch": 18.48, | |
| "grad_norm": 1.5674549341201782, | |
| "learning_rate": 6.365829145728643e-06, | |
| "loss": 0.0018, | |
| "step": 36675 | |
| }, | |
| { | |
| "epoch": 18.49, | |
| "grad_norm": 0.8131008744239807, | |
| "learning_rate": 6.363316582914574e-06, | |
| "loss": 0.0018, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 18.5, | |
| "grad_norm": 0.6329362392425537, | |
| "learning_rate": 6.360804020100503e-06, | |
| "loss": 0.0019, | |
| "step": 36725 | |
| }, | |
| { | |
| "epoch": 18.51, | |
| "grad_norm": 0.8405249714851379, | |
| "learning_rate": 6.358291457286433e-06, | |
| "loss": 0.0017, | |
| "step": 36750 | |
| }, | |
| { | |
| "epoch": 18.53, | |
| "grad_norm": 1.6821061372756958, | |
| "learning_rate": 6.3557788944723616e-06, | |
| "loss": 0.0014, | |
| "step": 36775 | |
| }, | |
| { | |
| "epoch": 18.54, | |
| "grad_norm": 0.9368191361427307, | |
| "learning_rate": 6.353266331658292e-06, | |
| "loss": 0.0014, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 18.55, | |
| "grad_norm": 1.3366914987564087, | |
| "learning_rate": 6.350753768844221e-06, | |
| "loss": 0.0014, | |
| "step": 36825 | |
| }, | |
| { | |
| "epoch": 18.56, | |
| "grad_norm": 1.3093204498291016, | |
| "learning_rate": 6.348241206030151e-06, | |
| "loss": 0.0018, | |
| "step": 36850 | |
| }, | |
| { | |
| "epoch": 18.58, | |
| "grad_norm": 1.0798649787902832, | |
| "learning_rate": 6.345728643216081e-06, | |
| "loss": 0.0016, | |
| "step": 36875 | |
| }, | |
| { | |
| "epoch": 18.59, | |
| "grad_norm": 0.4596012234687805, | |
| "learning_rate": 6.343216080402011e-06, | |
| "loss": 0.0016, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 18.6, | |
| "grad_norm": 1.961551547050476, | |
| "learning_rate": 6.340703517587941e-06, | |
| "loss": 0.0017, | |
| "step": 36925 | |
| }, | |
| { | |
| "epoch": 18.61, | |
| "grad_norm": 1.54167640209198, | |
| "learning_rate": 6.338190954773869e-06, | |
| "loss": 0.0018, | |
| "step": 36950 | |
| }, | |
| { | |
| "epoch": 18.63, | |
| "grad_norm": 1.0647872686386108, | |
| "learning_rate": 6.3356783919597995e-06, | |
| "loss": 0.0015, | |
| "step": 36975 | |
| }, | |
| { | |
| "epoch": 18.64, | |
| "grad_norm": 0.15062101185321808, | |
| "learning_rate": 6.333165829145729e-06, | |
| "loss": 0.0013, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 18.64, | |
| "eval_loss": 0.33555132150650024, | |
| "eval_runtime": 644.609, | |
| "eval_samples_per_second": 2.186, | |
| "eval_steps_per_second": 2.186, | |
| "eval_wer": 23.445174680041507, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 18.65, | |
| "grad_norm": 0.6003009080886841, | |
| "learning_rate": 6.330653266331659e-06, | |
| "loss": 0.0015, | |
| "step": 37025 | |
| }, | |
| { | |
| "epoch": 18.66, | |
| "grad_norm": 0.7250798344612122, | |
| "learning_rate": 6.3281407035175875e-06, | |
| "loss": 0.0016, | |
| "step": 37050 | |
| }, | |
| { | |
| "epoch": 18.68, | |
| "grad_norm": 0.7910952568054199, | |
| "learning_rate": 6.325628140703518e-06, | |
| "loss": 0.0016, | |
| "step": 37075 | |
| }, | |
| { | |
| "epoch": 18.69, | |
| "grad_norm": 1.4833486080169678, | |
| "learning_rate": 6.323115577889448e-06, | |
| "loss": 0.002, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 18.7, | |
| "grad_norm": 0.8942164182662964, | |
| "learning_rate": 6.320603015075377e-06, | |
| "loss": 0.0018, | |
| "step": 37125 | |
| }, | |
| { | |
| "epoch": 18.72, | |
| "grad_norm": 0.8438106179237366, | |
| "learning_rate": 6.318090452261307e-06, | |
| "loss": 0.002, | |
| "step": 37150 | |
| }, | |
| { | |
| "epoch": 18.73, | |
| "grad_norm": 1.0023553371429443, | |
| "learning_rate": 6.315577889447237e-06, | |
| "loss": 0.0013, | |
| "step": 37175 | |
| }, | |
| { | |
| "epoch": 18.74, | |
| "grad_norm": 0.8116686940193176, | |
| "learning_rate": 6.313065326633167e-06, | |
| "loss": 0.0015, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 18.75, | |
| "grad_norm": 1.0903185606002808, | |
| "learning_rate": 6.310552763819095e-06, | |
| "loss": 0.0016, | |
| "step": 37225 | |
| }, | |
| { | |
| "epoch": 18.77, | |
| "grad_norm": 1.1223067045211792, | |
| "learning_rate": 6.3080402010050255e-06, | |
| "loss": 0.0018, | |
| "step": 37250 | |
| }, | |
| { | |
| "epoch": 18.78, | |
| "grad_norm": 1.5012390613555908, | |
| "learning_rate": 6.305527638190956e-06, | |
| "loss": 0.0018, | |
| "step": 37275 | |
| }, | |
| { | |
| "epoch": 18.79, | |
| "grad_norm": 1.3460817337036133, | |
| "learning_rate": 6.303015075376885e-06, | |
| "loss": 0.0019, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 18.8, | |
| "grad_norm": 1.7468082904815674, | |
| "learning_rate": 6.300502512562815e-06, | |
| "loss": 0.0019, | |
| "step": 37325 | |
| }, | |
| { | |
| "epoch": 18.82, | |
| "grad_norm": 0.5250969529151917, | |
| "learning_rate": 6.297989949748744e-06, | |
| "loss": 0.0017, | |
| "step": 37350 | |
| }, | |
| { | |
| "epoch": 18.83, | |
| "grad_norm": 0.2302069365978241, | |
| "learning_rate": 6.295477386934674e-06, | |
| "loss": 0.0013, | |
| "step": 37375 | |
| }, | |
| { | |
| "epoch": 18.84, | |
| "grad_norm": 1.5310719013214111, | |
| "learning_rate": 6.292964824120603e-06, | |
| "loss": 0.0015, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 18.85, | |
| "grad_norm": 1.8044565916061401, | |
| "learning_rate": 6.290452261306533e-06, | |
| "loss": 0.0014, | |
| "step": 37425 | |
| }, | |
| { | |
| "epoch": 18.87, | |
| "grad_norm": 0.8181155920028687, | |
| "learning_rate": 6.287939698492463e-06, | |
| "loss": 0.0014, | |
| "step": 37450 | |
| }, | |
| { | |
| "epoch": 18.88, | |
| "grad_norm": 0.8504043817520142, | |
| "learning_rate": 6.285427135678393e-06, | |
| "loss": 0.0018, | |
| "step": 37475 | |
| }, | |
| { | |
| "epoch": 18.89, | |
| "grad_norm": 0.27127495408058167, | |
| "learning_rate": 6.282914572864323e-06, | |
| "loss": 0.0016, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 18.9, | |
| "grad_norm": 1.2492486238479614, | |
| "learning_rate": 6.280402010050251e-06, | |
| "loss": 0.0015, | |
| "step": 37525 | |
| }, | |
| { | |
| "epoch": 18.92, | |
| "grad_norm": 1.6787339448928833, | |
| "learning_rate": 6.277889447236182e-06, | |
| "loss": 0.0017, | |
| "step": 37550 | |
| }, | |
| { | |
| "epoch": 18.93, | |
| "grad_norm": 2.014810800552368, | |
| "learning_rate": 6.275376884422111e-06, | |
| "loss": 0.002, | |
| "step": 37575 | |
| }, | |
| { | |
| "epoch": 18.94, | |
| "grad_norm": 0.9987440705299377, | |
| "learning_rate": 6.272864321608041e-06, | |
| "loss": 0.002, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 18.95, | |
| "grad_norm": 0.6803994178771973, | |
| "learning_rate": 6.2703517587939696e-06, | |
| "loss": 0.0021, | |
| "step": 37625 | |
| }, | |
| { | |
| "epoch": 18.97, | |
| "grad_norm": 0.8169840574264526, | |
| "learning_rate": 6.2678391959799e-06, | |
| "loss": 0.0022, | |
| "step": 37650 | |
| }, | |
| { | |
| "epoch": 18.98, | |
| "grad_norm": 1.3978486061096191, | |
| "learning_rate": 6.265326633165829e-06, | |
| "loss": 0.0018, | |
| "step": 37675 | |
| }, | |
| { | |
| "epoch": 18.99, | |
| "grad_norm": 1.5592775344848633, | |
| "learning_rate": 6.262814070351759e-06, | |
| "loss": 0.0019, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 19.01, | |
| "grad_norm": 1.0616681575775146, | |
| "learning_rate": 6.260301507537689e-06, | |
| "loss": 0.0017, | |
| "step": 37725 | |
| }, | |
| { | |
| "epoch": 19.02, | |
| "grad_norm": 0.9332436919212341, | |
| "learning_rate": 6.257788944723619e-06, | |
| "loss": 0.0014, | |
| "step": 37750 | |
| }, | |
| { | |
| "epoch": 19.03, | |
| "grad_norm": 1.051811933517456, | |
| "learning_rate": 6.255276381909549e-06, | |
| "loss": 0.0013, | |
| "step": 37775 | |
| }, | |
| { | |
| "epoch": 19.04, | |
| "grad_norm": 1.1693936586380005, | |
| "learning_rate": 6.252763819095477e-06, | |
| "loss": 0.0014, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 19.06, | |
| "grad_norm": 1.9111791849136353, | |
| "learning_rate": 6.2502512562814075e-06, | |
| "loss": 0.0014, | |
| "step": 37825 | |
| }, | |
| { | |
| "epoch": 19.07, | |
| "grad_norm": 0.4390527904033661, | |
| "learning_rate": 6.247738693467337e-06, | |
| "loss": 0.0012, | |
| "step": 37850 | |
| }, | |
| { | |
| "epoch": 19.08, | |
| "grad_norm": 2.4373393058776855, | |
| "learning_rate": 6.245226130653267e-06, | |
| "loss": 0.0013, | |
| "step": 37875 | |
| }, | |
| { | |
| "epoch": 19.09, | |
| "grad_norm": 0.6409306526184082, | |
| "learning_rate": 6.242713567839197e-06, | |
| "loss": 0.0014, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 19.11, | |
| "grad_norm": 0.4275980293750763, | |
| "learning_rate": 6.240201005025126e-06, | |
| "loss": 0.0011, | |
| "step": 37925 | |
| }, | |
| { | |
| "epoch": 19.12, | |
| "grad_norm": 0.36015447974205017, | |
| "learning_rate": 6.237688442211056e-06, | |
| "loss": 0.0011, | |
| "step": 37950 | |
| }, | |
| { | |
| "epoch": 19.13, | |
| "grad_norm": 0.4539172351360321, | |
| "learning_rate": 6.235175879396985e-06, | |
| "loss": 0.0012, | |
| "step": 37975 | |
| }, | |
| { | |
| "epoch": 19.14, | |
| "grad_norm": 0.5768988132476807, | |
| "learning_rate": 6.232663316582915e-06, | |
| "loss": 0.001, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 19.14, | |
| "eval_loss": 0.3325794041156769, | |
| "eval_runtime": 648.1138, | |
| "eval_samples_per_second": 2.174, | |
| "eval_steps_per_second": 2.174, | |
| "eval_wer": 23.12694569353165, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 19.16, | |
| "grad_norm": 1.9239482879638672, | |
| "learning_rate": 6.230150753768845e-06, | |
| "loss": 0.0013, | |
| "step": 38025 | |
| }, | |
| { | |
| "epoch": 19.17, | |
| "grad_norm": 0.30082157254219055, | |
| "learning_rate": 6.227638190954775e-06, | |
| "loss": 0.001, | |
| "step": 38050 | |
| }, | |
| { | |
| "epoch": 19.18, | |
| "grad_norm": 0.20353496074676514, | |
| "learning_rate": 6.225125628140703e-06, | |
| "loss": 0.0011, | |
| "step": 38075 | |
| }, | |
| { | |
| "epoch": 19.19, | |
| "grad_norm": 0.6615707278251648, | |
| "learning_rate": 6.2226130653266335e-06, | |
| "loss": 0.0012, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 19.21, | |
| "grad_norm": 1.4539945125579834, | |
| "learning_rate": 6.220100502512564e-06, | |
| "loss": 0.0012, | |
| "step": 38125 | |
| }, | |
| { | |
| "epoch": 19.22, | |
| "grad_norm": 0.6011916399002075, | |
| "learning_rate": 6.217587939698493e-06, | |
| "loss": 0.0012, | |
| "step": 38150 | |
| }, | |
| { | |
| "epoch": 19.23, | |
| "grad_norm": 0.29558372497558594, | |
| "learning_rate": 6.215075376884423e-06, | |
| "loss": 0.0014, | |
| "step": 38175 | |
| }, | |
| { | |
| "epoch": 19.24, | |
| "grad_norm": 1.2541766166687012, | |
| "learning_rate": 6.212562814070352e-06, | |
| "loss": 0.0011, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 19.26, | |
| "grad_norm": 2.3364927768707275, | |
| "learning_rate": 6.210050251256282e-06, | |
| "loss": 0.0017, | |
| "step": 38225 | |
| }, | |
| { | |
| "epoch": 19.27, | |
| "grad_norm": 0.5079712867736816, | |
| "learning_rate": 6.207537688442211e-06, | |
| "loss": 0.0013, | |
| "step": 38250 | |
| }, | |
| { | |
| "epoch": 19.28, | |
| "grad_norm": 0.9235591292381287, | |
| "learning_rate": 6.205025125628141e-06, | |
| "loss": 0.0014, | |
| "step": 38275 | |
| }, | |
| { | |
| "epoch": 19.29, | |
| "grad_norm": 0.31673333048820496, | |
| "learning_rate": 6.202512562814071e-06, | |
| "loss": 0.0013, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 19.31, | |
| "grad_norm": 1.0981833934783936, | |
| "learning_rate": 6.200000000000001e-06, | |
| "loss": 0.0014, | |
| "step": 38325 | |
| }, | |
| { | |
| "epoch": 19.32, | |
| "grad_norm": 0.6616347432136536, | |
| "learning_rate": 6.197487437185931e-06, | |
| "loss": 0.0014, | |
| "step": 38350 | |
| }, | |
| { | |
| "epoch": 19.33, | |
| "grad_norm": 0.5207319259643555, | |
| "learning_rate": 6.1949748743718594e-06, | |
| "loss": 0.0013, | |
| "step": 38375 | |
| }, | |
| { | |
| "epoch": 19.35, | |
| "grad_norm": 1.1700994968414307, | |
| "learning_rate": 6.19246231155779e-06, | |
| "loss": 0.0015, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 19.36, | |
| "grad_norm": 1.4399977922439575, | |
| "learning_rate": 6.189949748743719e-06, | |
| "loss": 0.0014, | |
| "step": 38425 | |
| }, | |
| { | |
| "epoch": 19.37, | |
| "grad_norm": 0.9737831354141235, | |
| "learning_rate": 6.187437185929649e-06, | |
| "loss": 0.0013, | |
| "step": 38450 | |
| }, | |
| { | |
| "epoch": 19.38, | |
| "grad_norm": 0.8050452470779419, | |
| "learning_rate": 6.1849246231155776e-06, | |
| "loss": 0.0011, | |
| "step": 38475 | |
| }, | |
| { | |
| "epoch": 19.4, | |
| "grad_norm": 0.5391014218330383, | |
| "learning_rate": 6.182412060301508e-06, | |
| "loss": 0.0012, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 19.41, | |
| "grad_norm": 1.5151572227478027, | |
| "learning_rate": 6.179899497487438e-06, | |
| "loss": 0.0013, | |
| "step": 38525 | |
| }, | |
| { | |
| "epoch": 19.42, | |
| "grad_norm": 0.6566374897956848, | |
| "learning_rate": 6.177386934673367e-06, | |
| "loss": 0.0012, | |
| "step": 38550 | |
| }, | |
| { | |
| "epoch": 19.43, | |
| "grad_norm": 0.5387280583381653, | |
| "learning_rate": 6.174874371859297e-06, | |
| "loss": 0.0014, | |
| "step": 38575 | |
| }, | |
| { | |
| "epoch": 19.45, | |
| "grad_norm": 2.2199933528900146, | |
| "learning_rate": 6.172361809045227e-06, | |
| "loss": 0.0015, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 19.46, | |
| "grad_norm": 0.5629024505615234, | |
| "learning_rate": 6.169849246231157e-06, | |
| "loss": 0.0014, | |
| "step": 38625 | |
| }, | |
| { | |
| "epoch": 19.47, | |
| "grad_norm": 1.4785996675491333, | |
| "learning_rate": 6.167336683417085e-06, | |
| "loss": 0.0019, | |
| "step": 38650 | |
| }, | |
| { | |
| "epoch": 19.48, | |
| "grad_norm": 1.0027951002120972, | |
| "learning_rate": 6.1648241206030155e-06, | |
| "loss": 0.0016, | |
| "step": 38675 | |
| }, | |
| { | |
| "epoch": 19.5, | |
| "grad_norm": 0.8754851222038269, | |
| "learning_rate": 6.162311557788945e-06, | |
| "loss": 0.0013, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 19.51, | |
| "grad_norm": 1.2813969850540161, | |
| "learning_rate": 6.159798994974875e-06, | |
| "loss": 0.0012, | |
| "step": 38725 | |
| }, | |
| { | |
| "epoch": 19.52, | |
| "grad_norm": 0.9958238005638123, | |
| "learning_rate": 6.157286432160805e-06, | |
| "loss": 0.0012, | |
| "step": 38750 | |
| }, | |
| { | |
| "epoch": 19.53, | |
| "grad_norm": 1.3986196517944336, | |
| "learning_rate": 6.154773869346734e-06, | |
| "loss": 0.0013, | |
| "step": 38775 | |
| }, | |
| { | |
| "epoch": 19.55, | |
| "grad_norm": 0.44907984137535095, | |
| "learning_rate": 6.152261306532664e-06, | |
| "loss": 0.0013, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 19.56, | |
| "grad_norm": 1.5967319011688232, | |
| "learning_rate": 6.149748743718593e-06, | |
| "loss": 0.0017, | |
| "step": 38825 | |
| }, | |
| { | |
| "epoch": 19.57, | |
| "grad_norm": 1.011804223060608, | |
| "learning_rate": 6.147236180904523e-06, | |
| "loss": 0.0013, | |
| "step": 38850 | |
| }, | |
| { | |
| "epoch": 19.58, | |
| "grad_norm": 0.6981809139251709, | |
| "learning_rate": 6.144723618090453e-06, | |
| "loss": 0.0016, | |
| "step": 38875 | |
| }, | |
| { | |
| "epoch": 19.6, | |
| "grad_norm": 1.2294851541519165, | |
| "learning_rate": 6.142211055276383e-06, | |
| "loss": 0.0017, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 19.61, | |
| "grad_norm": 0.9731518030166626, | |
| "learning_rate": 6.139698492462311e-06, | |
| "loss": 0.0016, | |
| "step": 38925 | |
| }, | |
| { | |
| "epoch": 19.62, | |
| "grad_norm": 1.7019362449645996, | |
| "learning_rate": 6.1371859296482415e-06, | |
| "loss": 0.0016, | |
| "step": 38950 | |
| }, | |
| { | |
| "epoch": 19.63, | |
| "grad_norm": 0.35968101024627686, | |
| "learning_rate": 6.134673366834172e-06, | |
| "loss": 0.0016, | |
| "step": 38975 | |
| }, | |
| { | |
| "epoch": 19.65, | |
| "grad_norm": 1.0781711339950562, | |
| "learning_rate": 6.132160804020101e-06, | |
| "loss": 0.0016, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 19.65, | |
| "eval_loss": 0.34026119112968445, | |
| "eval_runtime": 642.6075, | |
| "eval_samples_per_second": 2.193, | |
| "eval_steps_per_second": 2.193, | |
| "eval_wer": 23.777239709443098, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 19.66, | |
| "grad_norm": 0.6743261218070984, | |
| "learning_rate": 6.129648241206031e-06, | |
| "loss": 0.0017, | |
| "step": 39025 | |
| }, | |
| { | |
| "epoch": 19.67, | |
| "grad_norm": 1.3148083686828613, | |
| "learning_rate": 6.12713567839196e-06, | |
| "loss": 0.0014, | |
| "step": 39050 | |
| }, | |
| { | |
| "epoch": 19.69, | |
| "grad_norm": 1.3721927404403687, | |
| "learning_rate": 6.12462311557789e-06, | |
| "loss": 0.0014, | |
| "step": 39075 | |
| }, | |
| { | |
| "epoch": 19.7, | |
| "grad_norm": 0.8227803707122803, | |
| "learning_rate": 6.122211055276382e-06, | |
| "loss": 0.0017, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 19.71, | |
| "grad_norm": 1.0405676364898682, | |
| "learning_rate": 6.1196984924623115e-06, | |
| "loss": 0.0016, | |
| "step": 39125 | |
| }, | |
| { | |
| "epoch": 19.72, | |
| "grad_norm": 0.7169470191001892, | |
| "learning_rate": 6.117185929648242e-06, | |
| "loss": 0.0017, | |
| "step": 39150 | |
| }, | |
| { | |
| "epoch": 19.74, | |
| "grad_norm": 0.5327123999595642, | |
| "learning_rate": 6.114673366834172e-06, | |
| "loss": 0.0014, | |
| "step": 39175 | |
| }, | |
| { | |
| "epoch": 19.75, | |
| "grad_norm": 1.0876247882843018, | |
| "learning_rate": 6.112160804020101e-06, | |
| "loss": 0.0015, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 19.76, | |
| "grad_norm": 0.7583673596382141, | |
| "learning_rate": 6.109648241206031e-06, | |
| "loss": 0.0011, | |
| "step": 39225 | |
| }, | |
| { | |
| "epoch": 19.77, | |
| "grad_norm": 0.7003112435340881, | |
| "learning_rate": 6.10713567839196e-06, | |
| "loss": 0.0013, | |
| "step": 39250 | |
| }, | |
| { | |
| "epoch": 19.79, | |
| "grad_norm": 0.8923580646514893, | |
| "learning_rate": 6.10462311557789e-06, | |
| "loss": 0.0018, | |
| "step": 39275 | |
| }, | |
| { | |
| "epoch": 19.8, | |
| "grad_norm": 1.0716352462768555, | |
| "learning_rate": 6.102110552763819e-06, | |
| "loss": 0.0012, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 19.81, | |
| "grad_norm": 1.9225443601608276, | |
| "learning_rate": 6.0995979899497495e-06, | |
| "loss": 0.0016, | |
| "step": 39325 | |
| }, | |
| { | |
| "epoch": 19.82, | |
| "grad_norm": 0.9054650664329529, | |
| "learning_rate": 6.09708542713568e-06, | |
| "loss": 0.0016, | |
| "step": 39350 | |
| }, | |
| { | |
| "epoch": 19.84, | |
| "grad_norm": 0.6190009117126465, | |
| "learning_rate": 6.094572864321608e-06, | |
| "loss": 0.0012, | |
| "step": 39375 | |
| }, | |
| { | |
| "epoch": 19.85, | |
| "grad_norm": 1.1662238836288452, | |
| "learning_rate": 6.092060301507538e-06, | |
| "loss": 0.0013, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 19.86, | |
| "grad_norm": 1.0806312561035156, | |
| "learning_rate": 6.089547738693468e-06, | |
| "loss": 0.0019, | |
| "step": 39425 | |
| }, | |
| { | |
| "epoch": 19.87, | |
| "grad_norm": 0.7853173017501831, | |
| "learning_rate": 6.087035175879398e-06, | |
| "loss": 0.0015, | |
| "step": 39450 | |
| }, | |
| { | |
| "epoch": 19.89, | |
| "grad_norm": 0.9636842012405396, | |
| "learning_rate": 6.084522613065327e-06, | |
| "loss": 0.0014, | |
| "step": 39475 | |
| }, | |
| { | |
| "epoch": 19.9, | |
| "grad_norm": 1.7559266090393066, | |
| "learning_rate": 6.082010050251257e-06, | |
| "loss": 0.0014, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 19.91, | |
| "grad_norm": 0.9466189742088318, | |
| "learning_rate": 6.079497487437186e-06, | |
| "loss": 0.0014, | |
| "step": 39525 | |
| }, | |
| { | |
| "epoch": 19.92, | |
| "grad_norm": 1.7009310722351074, | |
| "learning_rate": 6.076984924623116e-06, | |
| "loss": 0.0015, | |
| "step": 39550 | |
| }, | |
| { | |
| "epoch": 19.94, | |
| "grad_norm": 1.0113627910614014, | |
| "learning_rate": 6.074472361809046e-06, | |
| "loss": 0.0013, | |
| "step": 39575 | |
| }, | |
| { | |
| "epoch": 19.95, | |
| "grad_norm": 0.990430474281311, | |
| "learning_rate": 6.071959798994975e-06, | |
| "loss": 0.0016, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 19.96, | |
| "grad_norm": 1.0662728548049927, | |
| "learning_rate": 6.069447236180906e-06, | |
| "loss": 0.0017, | |
| "step": 39625 | |
| }, | |
| { | |
| "epoch": 19.97, | |
| "grad_norm": 1.152665138244629, | |
| "learning_rate": 6.066934673366834e-06, | |
| "loss": 0.0019, | |
| "step": 39650 | |
| }, | |
| { | |
| "epoch": 19.99, | |
| "grad_norm": 2.0811469554901123, | |
| "learning_rate": 6.064422110552764e-06, | |
| "loss": 0.0018, | |
| "step": 39675 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 1.3459393978118896, | |
| "learning_rate": 6.0619095477386936e-06, | |
| "loss": 0.0021, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 20.01, | |
| "grad_norm": 1.1473982334136963, | |
| "learning_rate": 6.059396984924624e-06, | |
| "loss": 0.0013, | |
| "step": 39725 | |
| }, | |
| { | |
| "epoch": 20.03, | |
| "grad_norm": 1.0483380556106567, | |
| "learning_rate": 6.056884422110553e-06, | |
| "loss": 0.0012, | |
| "step": 39750 | |
| }, | |
| { | |
| "epoch": 20.04, | |
| "grad_norm": 0.5056473016738892, | |
| "learning_rate": 6.054371859296483e-06, | |
| "loss": 0.0015, | |
| "step": 39775 | |
| }, | |
| { | |
| "epoch": 20.05, | |
| "grad_norm": 0.9125507473945618, | |
| "learning_rate": 6.051859296482413e-06, | |
| "loss": 0.0013, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 20.06, | |
| "grad_norm": 0.17925478518009186, | |
| "learning_rate": 6.049346733668342e-06, | |
| "loss": 0.0013, | |
| "step": 39825 | |
| }, | |
| { | |
| "epoch": 20.08, | |
| "grad_norm": 0.492924302816391, | |
| "learning_rate": 6.046834170854272e-06, | |
| "loss": 0.0013, | |
| "step": 39850 | |
| }, | |
| { | |
| "epoch": 20.09, | |
| "grad_norm": 1.4264193773269653, | |
| "learning_rate": 6.044321608040201e-06, | |
| "loss": 0.0014, | |
| "step": 39875 | |
| }, | |
| { | |
| "epoch": 20.1, | |
| "grad_norm": 0.6481070518493652, | |
| "learning_rate": 6.0418090452261315e-06, | |
| "loss": 0.0014, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 20.11, | |
| "grad_norm": 0.9014895558357239, | |
| "learning_rate": 6.03929648241206e-06, | |
| "loss": 0.0014, | |
| "step": 39925 | |
| }, | |
| { | |
| "epoch": 20.13, | |
| "grad_norm": 1.6231021881103516, | |
| "learning_rate": 6.03678391959799e-06, | |
| "loss": 0.0014, | |
| "step": 39950 | |
| }, | |
| { | |
| "epoch": 20.14, | |
| "grad_norm": 0.2752940058708191, | |
| "learning_rate": 6.03427135678392e-06, | |
| "loss": 0.0009, | |
| "step": 39975 | |
| }, | |
| { | |
| "epoch": 20.15, | |
| "grad_norm": 0.902050256729126, | |
| "learning_rate": 6.03175879396985e-06, | |
| "loss": 0.0009, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 20.15, | |
| "eval_loss": 0.3369000256061554, | |
| "eval_runtime": 648.606, | |
| "eval_samples_per_second": 2.172, | |
| "eval_steps_per_second": 2.172, | |
| "eval_wer": 23.24455205811138, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 20.16, | |
| "grad_norm": 0.18782569468021393, | |
| "learning_rate": 6.02924623115578e-06, | |
| "loss": 0.0007, | |
| "step": 40025 | |
| }, | |
| { | |
| "epoch": 20.18, | |
| "grad_norm": 0.6005980372428894, | |
| "learning_rate": 6.026733668341709e-06, | |
| "loss": 0.0008, | |
| "step": 40050 | |
| }, | |
| { | |
| "epoch": 20.19, | |
| "grad_norm": 0.7301942110061646, | |
| "learning_rate": 6.024221105527639e-06, | |
| "loss": 0.0011, | |
| "step": 40075 | |
| }, | |
| { | |
| "epoch": 20.2, | |
| "grad_norm": 0.4620230793952942, | |
| "learning_rate": 6.021708542713568e-06, | |
| "loss": 0.0012, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 20.21, | |
| "grad_norm": 0.23639623820781708, | |
| "learning_rate": 6.019195979899498e-06, | |
| "loss": 0.001, | |
| "step": 40125 | |
| }, | |
| { | |
| "epoch": 20.23, | |
| "grad_norm": 1.1007659435272217, | |
| "learning_rate": 6.016683417085427e-06, | |
| "loss": 0.0011, | |
| "step": 40150 | |
| }, | |
| { | |
| "epoch": 20.24, | |
| "grad_norm": 0.7579511404037476, | |
| "learning_rate": 6.0141708542713575e-06, | |
| "loss": 0.0013, | |
| "step": 40175 | |
| }, | |
| { | |
| "epoch": 20.25, | |
| "grad_norm": 0.17022021114826202, | |
| "learning_rate": 6.011658291457288e-06, | |
| "loss": 0.001, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 20.26, | |
| "grad_norm": 0.919007420539856, | |
| "learning_rate": 6.009145728643216e-06, | |
| "loss": 0.0013, | |
| "step": 40225 | |
| }, | |
| { | |
| "epoch": 20.28, | |
| "grad_norm": 0.8233655691146851, | |
| "learning_rate": 6.006633165829146e-06, | |
| "loss": 0.0011, | |
| "step": 40250 | |
| }, | |
| { | |
| "epoch": 20.29, | |
| "grad_norm": 0.6930840611457825, | |
| "learning_rate": 6.004120603015076e-06, | |
| "loss": 0.0012, | |
| "step": 40275 | |
| }, | |
| { | |
| "epoch": 20.3, | |
| "grad_norm": 0.4709855616092682, | |
| "learning_rate": 6.001608040201006e-06, | |
| "loss": 0.0018, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 20.31, | |
| "grad_norm": 0.2110186368227005, | |
| "learning_rate": 5.999095477386935e-06, | |
| "loss": 0.0012, | |
| "step": 40325 | |
| }, | |
| { | |
| "epoch": 20.33, | |
| "grad_norm": 0.48267343640327454, | |
| "learning_rate": 5.996582914572865e-06, | |
| "loss": 0.0012, | |
| "step": 40350 | |
| }, | |
| { | |
| "epoch": 20.34, | |
| "grad_norm": 0.6853476762771606, | |
| "learning_rate": 5.994070351758794e-06, | |
| "loss": 0.0012, | |
| "step": 40375 | |
| }, | |
| { | |
| "epoch": 20.35, | |
| "grad_norm": 0.9809117317199707, | |
| "learning_rate": 5.991557788944724e-06, | |
| "loss": 0.0012, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 20.37, | |
| "grad_norm": 1.3809919357299805, | |
| "learning_rate": 5.989045226130654e-06, | |
| "loss": 0.0015, | |
| "step": 40425 | |
| }, | |
| { | |
| "epoch": 20.38, | |
| "grad_norm": 1.5639605522155762, | |
| "learning_rate": 5.9865326633165834e-06, | |
| "loss": 0.002, | |
| "step": 40450 | |
| }, | |
| { | |
| "epoch": 20.39, | |
| "grad_norm": 1.0514106750488281, | |
| "learning_rate": 5.984020100502514e-06, | |
| "loss": 0.0015, | |
| "step": 40475 | |
| }, | |
| { | |
| "epoch": 20.4, | |
| "grad_norm": 0.9717534780502319, | |
| "learning_rate": 5.981507537688442e-06, | |
| "loss": 0.0017, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 20.42, | |
| "grad_norm": 1.0924715995788574, | |
| "learning_rate": 5.978994974874372e-06, | |
| "loss": 0.0012, | |
| "step": 40525 | |
| }, | |
| { | |
| "epoch": 20.43, | |
| "grad_norm": 1.3465650081634521, | |
| "learning_rate": 5.9764824120603016e-06, | |
| "loss": 0.0011, | |
| "step": 40550 | |
| }, | |
| { | |
| "epoch": 20.44, | |
| "grad_norm": 0.6348648071289062, | |
| "learning_rate": 5.973969849246232e-06, | |
| "loss": 0.0016, | |
| "step": 40575 | |
| }, | |
| { | |
| "epoch": 20.45, | |
| "grad_norm": 1.0228688716888428, | |
| "learning_rate": 5.971457286432162e-06, | |
| "loss": 0.0011, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 20.47, | |
| "grad_norm": 1.5299664735794067, | |
| "learning_rate": 5.968944723618091e-06, | |
| "loss": 0.0012, | |
| "step": 40625 | |
| }, | |
| { | |
| "epoch": 20.48, | |
| "grad_norm": 1.601320743560791, | |
| "learning_rate": 5.966432160804021e-06, | |
| "loss": 0.0014, | |
| "step": 40650 | |
| }, | |
| { | |
| "epoch": 20.49, | |
| "grad_norm": 0.6638547778129578, | |
| "learning_rate": 5.96391959798995e-06, | |
| "loss": 0.0014, | |
| "step": 40675 | |
| }, | |
| { | |
| "epoch": 20.5, | |
| "grad_norm": 2.6972315311431885, | |
| "learning_rate": 5.96140703517588e-06, | |
| "loss": 0.0016, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 20.52, | |
| "grad_norm": 0.6832017302513123, | |
| "learning_rate": 5.958894472361809e-06, | |
| "loss": 0.0016, | |
| "step": 40725 | |
| }, | |
| { | |
| "epoch": 20.53, | |
| "grad_norm": 0.46338987350463867, | |
| "learning_rate": 5.9563819095477395e-06, | |
| "loss": 0.0012, | |
| "step": 40750 | |
| }, | |
| { | |
| "epoch": 20.54, | |
| "grad_norm": 0.3584813177585602, | |
| "learning_rate": 5.953869346733668e-06, | |
| "loss": 0.0015, | |
| "step": 40775 | |
| }, | |
| { | |
| "epoch": 20.55, | |
| "grad_norm": 1.5687421560287476, | |
| "learning_rate": 5.951356783919598e-06, | |
| "loss": 0.0017, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 20.57, | |
| "grad_norm": 0.5602162480354309, | |
| "learning_rate": 5.948844221105528e-06, | |
| "loss": 0.0012, | |
| "step": 40825 | |
| }, | |
| { | |
| "epoch": 20.58, | |
| "grad_norm": 0.16096442937850952, | |
| "learning_rate": 5.946331658291458e-06, | |
| "loss": 0.0012, | |
| "step": 40850 | |
| }, | |
| { | |
| "epoch": 20.59, | |
| "grad_norm": 0.5620841979980469, | |
| "learning_rate": 5.943819095477388e-06, | |
| "loss": 0.0014, | |
| "step": 40875 | |
| }, | |
| { | |
| "epoch": 20.6, | |
| "grad_norm": 0.5683684349060059, | |
| "learning_rate": 5.941306532663317e-06, | |
| "loss": 0.0017, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 20.62, | |
| "grad_norm": 1.4936867952346802, | |
| "learning_rate": 5.938793969849247e-06, | |
| "loss": 0.0015, | |
| "step": 40925 | |
| }, | |
| { | |
| "epoch": 20.63, | |
| "grad_norm": 0.45212438702583313, | |
| "learning_rate": 5.936281407035176e-06, | |
| "loss": 0.0013, | |
| "step": 40950 | |
| }, | |
| { | |
| "epoch": 20.64, | |
| "grad_norm": 1.8357038497924805, | |
| "learning_rate": 5.933768844221106e-06, | |
| "loss": 0.0011, | |
| "step": 40975 | |
| }, | |
| { | |
| "epoch": 20.65, | |
| "grad_norm": 0.1379358172416687, | |
| "learning_rate": 5.931256281407035e-06, | |
| "loss": 0.0015, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 20.65, | |
| "eval_loss": 0.3424818515777588, | |
| "eval_runtime": 650.8582, | |
| "eval_samples_per_second": 2.165, | |
| "eval_steps_per_second": 2.165, | |
| "eval_wer": 23.36215842269111, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 20.67, | |
| "grad_norm": 0.342557817697525, | |
| "learning_rate": 5.9287437185929655e-06, | |
| "loss": 0.001, | |
| "step": 41025 | |
| }, | |
| { | |
| "epoch": 20.68, | |
| "grad_norm": 0.7056984901428223, | |
| "learning_rate": 5.926231155778896e-06, | |
| "loss": 0.0013, | |
| "step": 41050 | |
| }, | |
| { | |
| "epoch": 20.69, | |
| "grad_norm": 1.0098013877868652, | |
| "learning_rate": 5.923718592964824e-06, | |
| "loss": 0.0015, | |
| "step": 41075 | |
| }, | |
| { | |
| "epoch": 20.71, | |
| "grad_norm": 0.6967382431030273, | |
| "learning_rate": 5.921206030150754e-06, | |
| "loss": 0.0014, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 20.72, | |
| "grad_norm": 0.544989287853241, | |
| "learning_rate": 5.918693467336684e-06, | |
| "loss": 0.0016, | |
| "step": 41125 | |
| }, | |
| { | |
| "epoch": 20.73, | |
| "grad_norm": 1.2400965690612793, | |
| "learning_rate": 5.916180904522614e-06, | |
| "loss": 0.0015, | |
| "step": 41150 | |
| }, | |
| { | |
| "epoch": 20.74, | |
| "grad_norm": 0.926023006439209, | |
| "learning_rate": 5.913668341708543e-06, | |
| "loss": 0.0016, | |
| "step": 41175 | |
| }, | |
| { | |
| "epoch": 20.76, | |
| "grad_norm": 1.1986762285232544, | |
| "learning_rate": 5.911155778894473e-06, | |
| "loss": 0.0016, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 20.77, | |
| "grad_norm": 0.6431388854980469, | |
| "learning_rate": 5.908643216080403e-06, | |
| "loss": 0.0014, | |
| "step": 41225 | |
| }, | |
| { | |
| "epoch": 20.78, | |
| "grad_norm": 1.1368434429168701, | |
| "learning_rate": 5.906130653266332e-06, | |
| "loss": 0.0013, | |
| "step": 41250 | |
| }, | |
| { | |
| "epoch": 20.79, | |
| "grad_norm": 0.7638266086578369, | |
| "learning_rate": 5.903618090452262e-06, | |
| "loss": 0.0014, | |
| "step": 41275 | |
| }, | |
| { | |
| "epoch": 20.81, | |
| "grad_norm": 1.2455073595046997, | |
| "learning_rate": 5.9011055276381914e-06, | |
| "loss": 0.0013, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 20.82, | |
| "grad_norm": 0.9213681817054749, | |
| "learning_rate": 5.898592964824122e-06, | |
| "loss": 0.0015, | |
| "step": 41325 | |
| }, | |
| { | |
| "epoch": 20.83, | |
| "grad_norm": 0.5151415467262268, | |
| "learning_rate": 5.89608040201005e-06, | |
| "loss": 0.0016, | |
| "step": 41350 | |
| }, | |
| { | |
| "epoch": 20.84, | |
| "grad_norm": 0.7288360595703125, | |
| "learning_rate": 5.89356783919598e-06, | |
| "loss": 0.0017, | |
| "step": 41375 | |
| }, | |
| { | |
| "epoch": 20.86, | |
| "grad_norm": 0.4819887578487396, | |
| "learning_rate": 5.8910552763819096e-06, | |
| "loss": 0.0013, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 20.87, | |
| "grad_norm": 1.1284375190734863, | |
| "learning_rate": 5.88854271356784e-06, | |
| "loss": 0.0015, | |
| "step": 41425 | |
| }, | |
| { | |
| "epoch": 20.88, | |
| "grad_norm": 0.31427863240242004, | |
| "learning_rate": 5.88603015075377e-06, | |
| "loss": 0.0014, | |
| "step": 41450 | |
| }, | |
| { | |
| "epoch": 20.89, | |
| "grad_norm": 0.9035623669624329, | |
| "learning_rate": 5.883517587939699e-06, | |
| "loss": 0.0013, | |
| "step": 41475 | |
| }, | |
| { | |
| "epoch": 20.91, | |
| "grad_norm": 1.357260823249817, | |
| "learning_rate": 5.881005025125629e-06, | |
| "loss": 0.0011, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 20.92, | |
| "grad_norm": 0.8989688158035278, | |
| "learning_rate": 5.878492462311558e-06, | |
| "loss": 0.0016, | |
| "step": 41525 | |
| }, | |
| { | |
| "epoch": 20.93, | |
| "grad_norm": 1.1103880405426025, | |
| "learning_rate": 5.875979899497488e-06, | |
| "loss": 0.0016, | |
| "step": 41550 | |
| }, | |
| { | |
| "epoch": 20.94, | |
| "grad_norm": 0.8313987851142883, | |
| "learning_rate": 5.873467336683417e-06, | |
| "loss": 0.0012, | |
| "step": 41575 | |
| }, | |
| { | |
| "epoch": 20.96, | |
| "grad_norm": 1.2921781539916992, | |
| "learning_rate": 5.8709547738693475e-06, | |
| "loss": 0.0013, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 20.97, | |
| "grad_norm": 0.9626322984695435, | |
| "learning_rate": 5.868442211055276e-06, | |
| "loss": 0.0014, | |
| "step": 41625 | |
| }, | |
| { | |
| "epoch": 20.98, | |
| "grad_norm": 1.2628862857818604, | |
| "learning_rate": 5.865929648241206e-06, | |
| "loss": 0.0012, | |
| "step": 41650 | |
| }, | |
| { | |
| "epoch": 20.99, | |
| "grad_norm": 0.6756789088249207, | |
| "learning_rate": 5.863417085427136e-06, | |
| "loss": 0.0011, | |
| "step": 41675 | |
| }, | |
| { | |
| "epoch": 21.01, | |
| "grad_norm": 0.42541012167930603, | |
| "learning_rate": 5.860904522613066e-06, | |
| "loss": 0.0012, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 21.02, | |
| "grad_norm": 1.8883837461471558, | |
| "learning_rate": 5.858391959798996e-06, | |
| "loss": 0.0012, | |
| "step": 41725 | |
| }, | |
| { | |
| "epoch": 21.03, | |
| "grad_norm": 0.9031746983528137, | |
| "learning_rate": 5.855879396984925e-06, | |
| "loss": 0.001, | |
| "step": 41750 | |
| }, | |
| { | |
| "epoch": 21.05, | |
| "grad_norm": 0.30492404103279114, | |
| "learning_rate": 5.853366834170855e-06, | |
| "loss": 0.001, | |
| "step": 41775 | |
| }, | |
| { | |
| "epoch": 21.06, | |
| "grad_norm": 0.8610237836837769, | |
| "learning_rate": 5.850854271356784e-06, | |
| "loss": 0.0008, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 21.07, | |
| "grad_norm": 0.36193329095840454, | |
| "learning_rate": 5.848341708542714e-06, | |
| "loss": 0.001, | |
| "step": 41825 | |
| }, | |
| { | |
| "epoch": 21.08, | |
| "grad_norm": 0.7772315740585327, | |
| "learning_rate": 5.845829145728644e-06, | |
| "loss": 0.0012, | |
| "step": 41850 | |
| }, | |
| { | |
| "epoch": 21.1, | |
| "grad_norm": 0.4699445068836212, | |
| "learning_rate": 5.8433165829145735e-06, | |
| "loss": 0.0008, | |
| "step": 41875 | |
| }, | |
| { | |
| "epoch": 21.11, | |
| "grad_norm": 1.3260185718536377, | |
| "learning_rate": 5.840804020100504e-06, | |
| "loss": 0.0009, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 21.12, | |
| "grad_norm": 0.33898288011550903, | |
| "learning_rate": 5.838291457286432e-06, | |
| "loss": 0.0008, | |
| "step": 41925 | |
| }, | |
| { | |
| "epoch": 21.13, | |
| "grad_norm": 1.4685719013214111, | |
| "learning_rate": 5.835778894472362e-06, | |
| "loss": 0.0011, | |
| "step": 41950 | |
| }, | |
| { | |
| "epoch": 21.15, | |
| "grad_norm": 2.672056198120117, | |
| "learning_rate": 5.833266331658292e-06, | |
| "loss": 0.0011, | |
| "step": 41975 | |
| }, | |
| { | |
| "epoch": 21.16, | |
| "grad_norm": 1.9097732305526733, | |
| "learning_rate": 5.830753768844222e-06, | |
| "loss": 0.0012, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 21.16, | |
| "eval_loss": 0.33881473541259766, | |
| "eval_runtime": 651.7527, | |
| "eval_samples_per_second": 2.162, | |
| "eval_steps_per_second": 2.162, | |
| "eval_wer": 22.815634728467657, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 21.17, | |
| "grad_norm": 1.823331356048584, | |
| "learning_rate": 5.828241206030151e-06, | |
| "loss": 0.0012, | |
| "step": 42025 | |
| }, | |
| { | |
| "epoch": 21.18, | |
| "grad_norm": 0.6585187911987305, | |
| "learning_rate": 5.825728643216081e-06, | |
| "loss": 0.0007, | |
| "step": 42050 | |
| }, | |
| { | |
| "epoch": 21.2, | |
| "grad_norm": 0.8023566603660583, | |
| "learning_rate": 5.823216080402011e-06, | |
| "loss": 0.0014, | |
| "step": 42075 | |
| }, | |
| { | |
| "epoch": 21.21, | |
| "grad_norm": 0.2983376979827881, | |
| "learning_rate": 5.82070351758794e-06, | |
| "loss": 0.0011, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 21.22, | |
| "grad_norm": 0.860016942024231, | |
| "learning_rate": 5.81819095477387e-06, | |
| "loss": 0.0014, | |
| "step": 42125 | |
| }, | |
| { | |
| "epoch": 21.23, | |
| "grad_norm": 0.5695117115974426, | |
| "learning_rate": 5.8156783919597994e-06, | |
| "loss": 0.0011, | |
| "step": 42150 | |
| }, | |
| { | |
| "epoch": 21.25, | |
| "grad_norm": 1.583242416381836, | |
| "learning_rate": 5.81316582914573e-06, | |
| "loss": 0.0009, | |
| "step": 42175 | |
| }, | |
| { | |
| "epoch": 21.26, | |
| "grad_norm": 0.7320623397827148, | |
| "learning_rate": 5.810653266331658e-06, | |
| "loss": 0.0011, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 21.27, | |
| "grad_norm": 0.40352964401245117, | |
| "learning_rate": 5.808140703517588e-06, | |
| "loss": 0.0011, | |
| "step": 42225 | |
| }, | |
| { | |
| "epoch": 21.28, | |
| "grad_norm": 1.1269155740737915, | |
| "learning_rate": 5.8056281407035176e-06, | |
| "loss": 0.0012, | |
| "step": 42250 | |
| }, | |
| { | |
| "epoch": 21.3, | |
| "grad_norm": 0.5032210350036621, | |
| "learning_rate": 5.803216080402011e-06, | |
| "loss": 0.0011, | |
| "step": 42275 | |
| }, | |
| { | |
| "epoch": 21.31, | |
| "grad_norm": 0.9120995998382568, | |
| "learning_rate": 5.80070351758794e-06, | |
| "loss": 0.0009, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 21.32, | |
| "grad_norm": 1.0807931423187256, | |
| "learning_rate": 5.79819095477387e-06, | |
| "loss": 0.001, | |
| "step": 42325 | |
| }, | |
| { | |
| "epoch": 21.34, | |
| "grad_norm": 0.5654991269111633, | |
| "learning_rate": 5.7956783919598e-06, | |
| "loss": 0.0012, | |
| "step": 42350 | |
| }, | |
| { | |
| "epoch": 21.35, | |
| "grad_norm": 0.4672817885875702, | |
| "learning_rate": 5.793165829145729e-06, | |
| "loss": 0.0013, | |
| "step": 42375 | |
| }, | |
| { | |
| "epoch": 21.36, | |
| "grad_norm": 0.4968101978302002, | |
| "learning_rate": 5.790653266331658e-06, | |
| "loss": 0.0011, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 21.37, | |
| "grad_norm": 1.5632412433624268, | |
| "learning_rate": 5.7881407035175884e-06, | |
| "loss": 0.0009, | |
| "step": 42425 | |
| }, | |
| { | |
| "epoch": 21.39, | |
| "grad_norm": 1.279451847076416, | |
| "learning_rate": 5.785628140703518e-06, | |
| "loss": 0.0012, | |
| "step": 42450 | |
| }, | |
| { | |
| "epoch": 21.4, | |
| "grad_norm": 0.2353668063879013, | |
| "learning_rate": 5.783115577889448e-06, | |
| "loss": 0.0012, | |
| "step": 42475 | |
| }, | |
| { | |
| "epoch": 21.41, | |
| "grad_norm": 1.097109079360962, | |
| "learning_rate": 5.780603015075378e-06, | |
| "loss": 0.0013, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 21.42, | |
| "grad_norm": 0.5137972235679626, | |
| "learning_rate": 5.778090452261307e-06, | |
| "loss": 0.0014, | |
| "step": 42525 | |
| }, | |
| { | |
| "epoch": 21.44, | |
| "grad_norm": 0.7982520461082458, | |
| "learning_rate": 5.775577889447237e-06, | |
| "loss": 0.001, | |
| "step": 42550 | |
| }, | |
| { | |
| "epoch": 21.45, | |
| "grad_norm": 4.075470924377441, | |
| "learning_rate": 5.773065326633166e-06, | |
| "loss": 0.0013, | |
| "step": 42575 | |
| }, | |
| { | |
| "epoch": 21.46, | |
| "grad_norm": 0.4397733211517334, | |
| "learning_rate": 5.770552763819096e-06, | |
| "loss": 0.0012, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 21.47, | |
| "grad_norm": 0.650394082069397, | |
| "learning_rate": 5.7680402010050256e-06, | |
| "loss": 0.0011, | |
| "step": 42625 | |
| }, | |
| { | |
| "epoch": 21.49, | |
| "grad_norm": 1.209378957748413, | |
| "learning_rate": 5.765527638190955e-06, | |
| "loss": 0.0011, | |
| "step": 42650 | |
| }, | |
| { | |
| "epoch": 21.5, | |
| "grad_norm": 0.47554540634155273, | |
| "learning_rate": 5.763015075376885e-06, | |
| "loss": 0.0012, | |
| "step": 42675 | |
| }, | |
| { | |
| "epoch": 21.51, | |
| "grad_norm": 1.1790157556533813, | |
| "learning_rate": 5.760502512562814e-06, | |
| "loss": 0.0012, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 21.52, | |
| "grad_norm": 1.1118874549865723, | |
| "learning_rate": 5.7579899497487446e-06, | |
| "loss": 0.0012, | |
| "step": 42725 | |
| }, | |
| { | |
| "epoch": 21.54, | |
| "grad_norm": 0.956529974937439, | |
| "learning_rate": 5.755477386934674e-06, | |
| "loss": 0.0012, | |
| "step": 42750 | |
| }, | |
| { | |
| "epoch": 21.55, | |
| "grad_norm": 0.8777883052825928, | |
| "learning_rate": 5.752964824120604e-06, | |
| "loss": 0.0014, | |
| "step": 42775 | |
| }, | |
| { | |
| "epoch": 21.56, | |
| "grad_norm": 1.8465042114257812, | |
| "learning_rate": 5.7504522613065325e-06, | |
| "loss": 0.0013, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 21.57, | |
| "grad_norm": 1.209672212600708, | |
| "learning_rate": 5.747939698492463e-06, | |
| "loss": 0.0014, | |
| "step": 42825 | |
| }, | |
| { | |
| "epoch": 21.59, | |
| "grad_norm": 1.0998687744140625, | |
| "learning_rate": 5.745427135678392e-06, | |
| "loss": 0.0015, | |
| "step": 42850 | |
| }, | |
| { | |
| "epoch": 21.6, | |
| "grad_norm": 1.5435659885406494, | |
| "learning_rate": 5.742914572864322e-06, | |
| "loss": 0.0016, | |
| "step": 42875 | |
| }, | |
| { | |
| "epoch": 21.61, | |
| "grad_norm": 1.4470527172088623, | |
| "learning_rate": 5.740402010050252e-06, | |
| "loss": 0.0013, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 21.62, | |
| "grad_norm": 0.8701749444007874, | |
| "learning_rate": 5.737889447236181e-06, | |
| "loss": 0.0013, | |
| "step": 42925 | |
| }, | |
| { | |
| "epoch": 21.64, | |
| "grad_norm": 1.1655815839767456, | |
| "learning_rate": 5.735376884422111e-06, | |
| "loss": 0.001, | |
| "step": 42950 | |
| }, | |
| { | |
| "epoch": 21.65, | |
| "grad_norm": 0.8317478895187378, | |
| "learning_rate": 5.73286432160804e-06, | |
| "loss": 0.0012, | |
| "step": 42975 | |
| }, | |
| { | |
| "epoch": 21.66, | |
| "grad_norm": 0.2232298105955124, | |
| "learning_rate": 5.7303517587939705e-06, | |
| "loss": 0.0009, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 21.66, | |
| "eval_loss": 0.34524887800216675, | |
| "eval_runtime": 653.6286, | |
| "eval_samples_per_second": 2.156, | |
| "eval_steps_per_second": 2.156, | |
| "eval_wer": 23.13386371497752, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 21.68, | |
| "grad_norm": 1.433415412902832, | |
| "learning_rate": 5.7278391959799e-06, | |
| "loss": 0.0012, | |
| "step": 43025 | |
| }, | |
| { | |
| "epoch": 21.69, | |
| "grad_norm": 1.2699315547943115, | |
| "learning_rate": 5.72532663316583e-06, | |
| "loss": 0.0013, | |
| "step": 43050 | |
| }, | |
| { | |
| "epoch": 21.7, | |
| "grad_norm": 1.10042405128479, | |
| "learning_rate": 5.7228140703517585e-06, | |
| "loss": 0.0015, | |
| "step": 43075 | |
| }, | |
| { | |
| "epoch": 21.71, | |
| "grad_norm": 1.3270542621612549, | |
| "learning_rate": 5.720301507537689e-06, | |
| "loss": 0.0012, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 21.73, | |
| "grad_norm": 1.0051465034484863, | |
| "learning_rate": 5.717788944723619e-06, | |
| "loss": 0.0013, | |
| "step": 43125 | |
| }, | |
| { | |
| "epoch": 21.74, | |
| "grad_norm": 2.064424514770508, | |
| "learning_rate": 5.715276381909548e-06, | |
| "loss": 0.0013, | |
| "step": 43150 | |
| }, | |
| { | |
| "epoch": 21.75, | |
| "grad_norm": 1.45639967918396, | |
| "learning_rate": 5.712763819095478e-06, | |
| "loss": 0.0012, | |
| "step": 43175 | |
| }, | |
| { | |
| "epoch": 21.76, | |
| "grad_norm": 0.627719521522522, | |
| "learning_rate": 5.710251256281407e-06, | |
| "loss": 0.0014, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 21.78, | |
| "grad_norm": 0.1227678582072258, | |
| "learning_rate": 5.707738693467337e-06, | |
| "loss": 0.001, | |
| "step": 43225 | |
| }, | |
| { | |
| "epoch": 21.79, | |
| "grad_norm": 0.5857130885124207, | |
| "learning_rate": 5.705226130653266e-06, | |
| "loss": 0.0011, | |
| "step": 43250 | |
| }, | |
| { | |
| "epoch": 21.8, | |
| "grad_norm": 0.859379231929779, | |
| "learning_rate": 5.7027135678391964e-06, | |
| "loss": 0.0009, | |
| "step": 43275 | |
| }, | |
| { | |
| "epoch": 21.81, | |
| "grad_norm": 0.30906084179878235, | |
| "learning_rate": 5.700201005025127e-06, | |
| "loss": 0.0012, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 21.83, | |
| "grad_norm": 0.7923578023910522, | |
| "learning_rate": 5.697688442211056e-06, | |
| "loss": 0.0013, | |
| "step": 43325 | |
| }, | |
| { | |
| "epoch": 21.84, | |
| "grad_norm": 0.5775353908538818, | |
| "learning_rate": 5.695175879396986e-06, | |
| "loss": 0.0012, | |
| "step": 43350 | |
| }, | |
| { | |
| "epoch": 21.85, | |
| "grad_norm": 3.3586642742156982, | |
| "learning_rate": 5.692663316582915e-06, | |
| "loss": 0.0012, | |
| "step": 43375 | |
| }, | |
| { | |
| "epoch": 21.86, | |
| "grad_norm": 1.088348388671875, | |
| "learning_rate": 5.690150753768845e-06, | |
| "loss": 0.0014, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 21.88, | |
| "grad_norm": 1.9004027843475342, | |
| "learning_rate": 5.687638190954774e-06, | |
| "loss": 0.0015, | |
| "step": 43425 | |
| }, | |
| { | |
| "epoch": 21.89, | |
| "grad_norm": 1.9646518230438232, | |
| "learning_rate": 5.685125628140704e-06, | |
| "loss": 0.0017, | |
| "step": 43450 | |
| }, | |
| { | |
| "epoch": 21.9, | |
| "grad_norm": 2.539280891418457, | |
| "learning_rate": 5.6826130653266336e-06, | |
| "loss": 0.0013, | |
| "step": 43475 | |
| }, | |
| { | |
| "epoch": 21.91, | |
| "grad_norm": 0.3101285398006439, | |
| "learning_rate": 5.680100502512563e-06, | |
| "loss": 0.0012, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 21.93, | |
| "grad_norm": 2.127978563308716, | |
| "learning_rate": 5.677587939698493e-06, | |
| "loss": 0.0011, | |
| "step": 43525 | |
| }, | |
| { | |
| "epoch": 21.94, | |
| "grad_norm": 0.44460466504096985, | |
| "learning_rate": 5.675075376884422e-06, | |
| "loss": 0.0016, | |
| "step": 43550 | |
| }, | |
| { | |
| "epoch": 21.95, | |
| "grad_norm": 1.158146858215332, | |
| "learning_rate": 5.6725628140703526e-06, | |
| "loss": 0.0015, | |
| "step": 43575 | |
| }, | |
| { | |
| "epoch": 21.96, | |
| "grad_norm": 1.6001086235046387, | |
| "learning_rate": 5.670050251256282e-06, | |
| "loss": 0.0015, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 21.98, | |
| "grad_norm": 0.6981241703033447, | |
| "learning_rate": 5.667537688442212e-06, | |
| "loss": 0.0013, | |
| "step": 43625 | |
| }, | |
| { | |
| "epoch": 21.99, | |
| "grad_norm": 0.45709845423698425, | |
| "learning_rate": 5.6650251256281405e-06, | |
| "loss": 0.0015, | |
| "step": 43650 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "grad_norm": 1.2674909830093384, | |
| "learning_rate": 5.662512562814071e-06, | |
| "loss": 0.0015, | |
| "step": 43675 | |
| }, | |
| { | |
| "epoch": 22.02, | |
| "grad_norm": 0.591598629951477, | |
| "learning_rate": 5.66e-06, | |
| "loss": 0.0008, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 22.03, | |
| "grad_norm": 0.76810222864151, | |
| "learning_rate": 5.65748743718593e-06, | |
| "loss": 0.0009, | |
| "step": 43725 | |
| }, | |
| { | |
| "epoch": 22.04, | |
| "grad_norm": 1.0374010801315308, | |
| "learning_rate": 5.65497487437186e-06, | |
| "loss": 0.0008, | |
| "step": 43750 | |
| }, | |
| { | |
| "epoch": 22.05, | |
| "grad_norm": 1.3293951749801636, | |
| "learning_rate": 5.652462311557789e-06, | |
| "loss": 0.001, | |
| "step": 43775 | |
| }, | |
| { | |
| "epoch": 22.07, | |
| "grad_norm": 1.8345214128494263, | |
| "learning_rate": 5.649949748743719e-06, | |
| "loss": 0.0007, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 22.08, | |
| "grad_norm": 0.9170948266983032, | |
| "learning_rate": 5.647437185929648e-06, | |
| "loss": 0.0009, | |
| "step": 43825 | |
| }, | |
| { | |
| "epoch": 22.09, | |
| "grad_norm": 0.6133562326431274, | |
| "learning_rate": 5.6449246231155785e-06, | |
| "loss": 0.0009, | |
| "step": 43850 | |
| }, | |
| { | |
| "epoch": 22.1, | |
| "grad_norm": 0.9368806481361389, | |
| "learning_rate": 5.642412060301508e-06, | |
| "loss": 0.0011, | |
| "step": 43875 | |
| }, | |
| { | |
| "epoch": 22.12, | |
| "grad_norm": 0.38559451699256897, | |
| "learning_rate": 5.639899497487438e-06, | |
| "loss": 0.0012, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 22.13, | |
| "grad_norm": 0.1805734932422638, | |
| "learning_rate": 5.637386934673368e-06, | |
| "loss": 0.0008, | |
| "step": 43925 | |
| }, | |
| { | |
| "epoch": 22.14, | |
| "grad_norm": 3.672781467437744, | |
| "learning_rate": 5.634874371859297e-06, | |
| "loss": 0.0007, | |
| "step": 43950 | |
| }, | |
| { | |
| "epoch": 22.15, | |
| "grad_norm": 0.09901037812232971, | |
| "learning_rate": 5.632361809045227e-06, | |
| "loss": 0.0007, | |
| "step": 43975 | |
| }, | |
| { | |
| "epoch": 22.17, | |
| "grad_norm": 0.24132972955703735, | |
| "learning_rate": 5.629849246231156e-06, | |
| "loss": 0.0007, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 22.17, | |
| "eval_loss": 0.3424950838088989, | |
| "eval_runtime": 653.4662, | |
| "eval_samples_per_second": 2.156, | |
| "eval_steps_per_second": 2.156, | |
| "eval_wer": 22.725700449671393, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 22.18, | |
| "grad_norm": 1.1592354774475098, | |
| "learning_rate": 5.627336683417086e-06, | |
| "loss": 0.001, | |
| "step": 44025 | |
| }, | |
| { | |
| "epoch": 22.19, | |
| "grad_norm": 0.19992341101169586, | |
| "learning_rate": 5.624824120603015e-06, | |
| "loss": 0.0009, | |
| "step": 44050 | |
| }, | |
| { | |
| "epoch": 22.2, | |
| "grad_norm": 0.8353447914123535, | |
| "learning_rate": 5.622311557788945e-06, | |
| "loss": 0.0009, | |
| "step": 44075 | |
| }, | |
| { | |
| "epoch": 22.22, | |
| "grad_norm": 0.8957573771476746, | |
| "learning_rate": 5.619798994974874e-06, | |
| "loss": 0.0007, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 22.23, | |
| "grad_norm": 0.22726494073867798, | |
| "learning_rate": 5.6172864321608044e-06, | |
| "loss": 0.0008, | |
| "step": 44125 | |
| }, | |
| { | |
| "epoch": 22.24, | |
| "grad_norm": 0.44598016142845154, | |
| "learning_rate": 5.614773869346735e-06, | |
| "loss": 0.0011, | |
| "step": 44150 | |
| }, | |
| { | |
| "epoch": 22.25, | |
| "grad_norm": 1.2066985368728638, | |
| "learning_rate": 5.612261306532664e-06, | |
| "loss": 0.0007, | |
| "step": 44175 | |
| }, | |
| { | |
| "epoch": 22.27, | |
| "grad_norm": 0.41478270292282104, | |
| "learning_rate": 5.609748743718594e-06, | |
| "loss": 0.0008, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 22.28, | |
| "grad_norm": 0.7415564656257629, | |
| "learning_rate": 5.607236180904523e-06, | |
| "loss": 0.0007, | |
| "step": 44225 | |
| }, | |
| { | |
| "epoch": 22.29, | |
| "grad_norm": 0.26239436864852905, | |
| "learning_rate": 5.604723618090453e-06, | |
| "loss": 0.0007, | |
| "step": 44250 | |
| }, | |
| { | |
| "epoch": 22.3, | |
| "grad_norm": 0.6609335541725159, | |
| "learning_rate": 5.602211055276382e-06, | |
| "loss": 0.0007, | |
| "step": 44275 | |
| }, | |
| { | |
| "epoch": 22.32, | |
| "grad_norm": 0.35660025477409363, | |
| "learning_rate": 5.599698492462312e-06, | |
| "loss": 0.0008, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 22.33, | |
| "grad_norm": 0.5038982629776001, | |
| "learning_rate": 5.597185929648241e-06, | |
| "loss": 0.0008, | |
| "step": 44325 | |
| }, | |
| { | |
| "epoch": 22.34, | |
| "grad_norm": 1.157596468925476, | |
| "learning_rate": 5.594673366834171e-06, | |
| "loss": 0.0009, | |
| "step": 44350 | |
| }, | |
| { | |
| "epoch": 22.36, | |
| "grad_norm": 1.0030407905578613, | |
| "learning_rate": 5.592160804020101e-06, | |
| "loss": 0.001, | |
| "step": 44375 | |
| }, | |
| { | |
| "epoch": 22.37, | |
| "grad_norm": 0.47111421823501587, | |
| "learning_rate": 5.58964824120603e-06, | |
| "loss": 0.0008, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 22.38, | |
| "grad_norm": 0.3429202139377594, | |
| "learning_rate": 5.5871356783919606e-06, | |
| "loss": 0.0014, | |
| "step": 44425 | |
| }, | |
| { | |
| "epoch": 22.39, | |
| "grad_norm": 1.4424147605895996, | |
| "learning_rate": 5.58462311557789e-06, | |
| "loss": 0.0011, | |
| "step": 44450 | |
| }, | |
| { | |
| "epoch": 22.41, | |
| "grad_norm": 0.48875829577445984, | |
| "learning_rate": 5.58211055276382e-06, | |
| "loss": 0.0008, | |
| "step": 44475 | |
| }, | |
| { | |
| "epoch": 22.42, | |
| "grad_norm": 0.23549383878707886, | |
| "learning_rate": 5.5795979899497485e-06, | |
| "loss": 0.0007, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 22.43, | |
| "grad_norm": 1.24130117893219, | |
| "learning_rate": 5.577085427135679e-06, | |
| "loss": 0.0014, | |
| "step": 44525 | |
| }, | |
| { | |
| "epoch": 22.44, | |
| "grad_norm": 0.129581019282341, | |
| "learning_rate": 5.574572864321609e-06, | |
| "loss": 0.001, | |
| "step": 44550 | |
| }, | |
| { | |
| "epoch": 22.46, | |
| "grad_norm": 1.358135461807251, | |
| "learning_rate": 5.572060301507538e-06, | |
| "loss": 0.0012, | |
| "step": 44575 | |
| }, | |
| { | |
| "epoch": 22.47, | |
| "grad_norm": 0.5511311292648315, | |
| "learning_rate": 5.569547738693468e-06, | |
| "loss": 0.0011, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 22.48, | |
| "grad_norm": 0.2945619225502014, | |
| "learning_rate": 5.567035175879397e-06, | |
| "loss": 0.0009, | |
| "step": 44625 | |
| }, | |
| { | |
| "epoch": 22.49, | |
| "grad_norm": 1.2442690134048462, | |
| "learning_rate": 5.564522613065327e-06, | |
| "loss": 0.0011, | |
| "step": 44650 | |
| }, | |
| { | |
| "epoch": 22.51, | |
| "grad_norm": 1.0728257894515991, | |
| "learning_rate": 5.562010050251256e-06, | |
| "loss": 0.0009, | |
| "step": 44675 | |
| }, | |
| { | |
| "epoch": 22.52, | |
| "grad_norm": 0.51957768201828, | |
| "learning_rate": 5.5594974874371865e-06, | |
| "loss": 0.0012, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 22.53, | |
| "grad_norm": 1.0889358520507812, | |
| "learning_rate": 5.556984924623116e-06, | |
| "loss": 0.0008, | |
| "step": 44725 | |
| }, | |
| { | |
| "epoch": 22.54, | |
| "grad_norm": 1.2384743690490723, | |
| "learning_rate": 5.554472361809046e-06, | |
| "loss": 0.0009, | |
| "step": 44750 | |
| }, | |
| { | |
| "epoch": 22.56, | |
| "grad_norm": 1.6399548053741455, | |
| "learning_rate": 5.551959798994976e-06, | |
| "loss": 0.0012, | |
| "step": 44775 | |
| }, | |
| { | |
| "epoch": 22.57, | |
| "grad_norm": 1.122065544128418, | |
| "learning_rate": 5.549447236180905e-06, | |
| "loss": 0.0013, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 22.58, | |
| "grad_norm": 1.5687880516052246, | |
| "learning_rate": 5.546934673366835e-06, | |
| "loss": 0.0011, | |
| "step": 44825 | |
| }, | |
| { | |
| "epoch": 22.59, | |
| "grad_norm": 0.26494044065475464, | |
| "learning_rate": 5.544422110552764e-06, | |
| "loss": 0.0013, | |
| "step": 44850 | |
| }, | |
| { | |
| "epoch": 22.61, | |
| "grad_norm": 2.1011672019958496, | |
| "learning_rate": 5.541909547738694e-06, | |
| "loss": 0.0012, | |
| "step": 44875 | |
| }, | |
| { | |
| "epoch": 22.62, | |
| "grad_norm": 0.8654801249504089, | |
| "learning_rate": 5.539396984924623e-06, | |
| "loss": 0.0011, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 22.63, | |
| "grad_norm": 0.3072070777416229, | |
| "learning_rate": 5.536884422110553e-06, | |
| "loss": 0.0009, | |
| "step": 44925 | |
| }, | |
| { | |
| "epoch": 22.64, | |
| "grad_norm": 0.3950670659542084, | |
| "learning_rate": 5.534371859296482e-06, | |
| "loss": 0.0009, | |
| "step": 44950 | |
| }, | |
| { | |
| "epoch": 22.66, | |
| "grad_norm": 0.8394802212715149, | |
| "learning_rate": 5.5318592964824124e-06, | |
| "loss": 0.0007, | |
| "step": 44975 | |
| }, | |
| { | |
| "epoch": 22.67, | |
| "grad_norm": 0.3768616020679474, | |
| "learning_rate": 5.529346733668343e-06, | |
| "loss": 0.001, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 22.67, | |
| "eval_loss": 0.34873273968696594, | |
| "eval_runtime": 648.3792, | |
| "eval_samples_per_second": 2.173, | |
| "eval_steps_per_second": 2.173, | |
| "eval_wer": 22.79488066413006, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 22.68, | |
| "grad_norm": 0.8950992226600647, | |
| "learning_rate": 5.526834170854272e-06, | |
| "loss": 0.0012, | |
| "step": 45025 | |
| }, | |
| { | |
| "epoch": 22.7, | |
| "grad_norm": 0.6401100754737854, | |
| "learning_rate": 5.524321608040202e-06, | |
| "loss": 0.0013, | |
| "step": 45050 | |
| }, | |
| { | |
| "epoch": 22.71, | |
| "grad_norm": 1.1628910303115845, | |
| "learning_rate": 5.521809045226131e-06, | |
| "loss": 0.0012, | |
| "step": 45075 | |
| }, | |
| { | |
| "epoch": 22.72, | |
| "grad_norm": 1.316792607307434, | |
| "learning_rate": 5.519296482412061e-06, | |
| "loss": 0.001, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 22.73, | |
| "grad_norm": 1.3205770254135132, | |
| "learning_rate": 5.51678391959799e-06, | |
| "loss": 0.0013, | |
| "step": 45125 | |
| }, | |
| { | |
| "epoch": 22.75, | |
| "grad_norm": 0.2712342441082001, | |
| "learning_rate": 5.51427135678392e-06, | |
| "loss": 0.0011, | |
| "step": 45150 | |
| }, | |
| { | |
| "epoch": 22.76, | |
| "grad_norm": 0.47601330280303955, | |
| "learning_rate": 5.51175879396985e-06, | |
| "loss": 0.0011, | |
| "step": 45175 | |
| }, | |
| { | |
| "epoch": 22.77, | |
| "grad_norm": 0.9388231039047241, | |
| "learning_rate": 5.509246231155779e-06, | |
| "loss": 0.0012, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 22.78, | |
| "grad_norm": 1.183489203453064, | |
| "learning_rate": 5.506834170854271e-06, | |
| "loss": 0.0013, | |
| "step": 45225 | |
| }, | |
| { | |
| "epoch": 22.8, | |
| "grad_norm": 1.0215598344802856, | |
| "learning_rate": 5.5043216080402015e-06, | |
| "loss": 0.0013, | |
| "step": 45250 | |
| }, | |
| { | |
| "epoch": 22.81, | |
| "grad_norm": 0.5754547119140625, | |
| "learning_rate": 5.501809045226131e-06, | |
| "loss": 0.0012, | |
| "step": 45275 | |
| }, | |
| { | |
| "epoch": 22.82, | |
| "grad_norm": 1.5252500772476196, | |
| "learning_rate": 5.499296482412061e-06, | |
| "loss": 0.0014, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 22.83, | |
| "grad_norm": 0.5785127282142639, | |
| "learning_rate": 5.49678391959799e-06, | |
| "loss": 0.0011, | |
| "step": 45325 | |
| }, | |
| { | |
| "epoch": 22.85, | |
| "grad_norm": 1.1003527641296387, | |
| "learning_rate": 5.4942713567839204e-06, | |
| "loss": 0.001, | |
| "step": 45350 | |
| }, | |
| { | |
| "epoch": 22.86, | |
| "grad_norm": 1.1432653665542603, | |
| "learning_rate": 5.491758793969851e-06, | |
| "loss": 0.0012, | |
| "step": 45375 | |
| }, | |
| { | |
| "epoch": 22.87, | |
| "grad_norm": 0.7556006908416748, | |
| "learning_rate": 5.489246231155779e-06, | |
| "loss": 0.0011, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 22.88, | |
| "grad_norm": 0.6173690557479858, | |
| "learning_rate": 5.486733668341709e-06, | |
| "loss": 0.001, | |
| "step": 45425 | |
| }, | |
| { | |
| "epoch": 22.9, | |
| "grad_norm": 0.3593469262123108, | |
| "learning_rate": 5.484221105527639e-06, | |
| "loss": 0.0014, | |
| "step": 45450 | |
| }, | |
| { | |
| "epoch": 22.91, | |
| "grad_norm": 1.9169950485229492, | |
| "learning_rate": 5.481708542713569e-06, | |
| "loss": 0.0011, | |
| "step": 45475 | |
| }, | |
| { | |
| "epoch": 22.92, | |
| "grad_norm": 0.8808764815330505, | |
| "learning_rate": 5.479195979899497e-06, | |
| "loss": 0.0014, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 22.93, | |
| "grad_norm": 0.18010124564170837, | |
| "learning_rate": 5.476683417085427e-06, | |
| "loss": 0.0012, | |
| "step": 45525 | |
| }, | |
| { | |
| "epoch": 22.95, | |
| "grad_norm": 0.6205843091011047, | |
| "learning_rate": 5.474170854271357e-06, | |
| "loss": 0.001, | |
| "step": 45550 | |
| }, | |
| { | |
| "epoch": 22.96, | |
| "grad_norm": 1.8465744256973267, | |
| "learning_rate": 5.471658291457287e-06, | |
| "loss": 0.001, | |
| "step": 45575 | |
| }, | |
| { | |
| "epoch": 22.97, | |
| "grad_norm": 0.47944799065589905, | |
| "learning_rate": 5.469145728643217e-06, | |
| "loss": 0.0012, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 22.98, | |
| "grad_norm": 1.0778087377548218, | |
| "learning_rate": 5.466633165829146e-06, | |
| "loss": 0.0012, | |
| "step": 45625 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "grad_norm": 1.2320517301559448, | |
| "learning_rate": 5.4641206030150766e-06, | |
| "loss": 0.001, | |
| "step": 45650 | |
| }, | |
| { | |
| "epoch": 23.01, | |
| "grad_norm": 0.2702305316925049, | |
| "learning_rate": 5.461608040201005e-06, | |
| "loss": 0.0009, | |
| "step": 45675 | |
| }, | |
| { | |
| "epoch": 23.02, | |
| "grad_norm": 0.4143355190753937, | |
| "learning_rate": 5.459095477386935e-06, | |
| "loss": 0.0007, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 23.04, | |
| "grad_norm": 1.7724355459213257, | |
| "learning_rate": 5.4565829145728645e-06, | |
| "loss": 0.0007, | |
| "step": 45725 | |
| }, | |
| { | |
| "epoch": 23.05, | |
| "grad_norm": 0.8267619609832764, | |
| "learning_rate": 5.454070351758795e-06, | |
| "loss": 0.0007, | |
| "step": 45750 | |
| }, | |
| { | |
| "epoch": 23.06, | |
| "grad_norm": 0.7586312890052795, | |
| "learning_rate": 5.451557788944723e-06, | |
| "loss": 0.0007, | |
| "step": 45775 | |
| }, | |
| { | |
| "epoch": 23.07, | |
| "grad_norm": 0.6827680468559265, | |
| "learning_rate": 5.449045226130653e-06, | |
| "loss": 0.001, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 23.09, | |
| "grad_norm": 1.9296995401382446, | |
| "learning_rate": 5.4465326633165835e-06, | |
| "loss": 0.0011, | |
| "step": 45825 | |
| }, | |
| { | |
| "epoch": 23.1, | |
| "grad_norm": 0.6591385006904602, | |
| "learning_rate": 5.444020100502513e-06, | |
| "loss": 0.001, | |
| "step": 45850 | |
| }, | |
| { | |
| "epoch": 23.11, | |
| "grad_norm": 0.3265586793422699, | |
| "learning_rate": 5.441507537688443e-06, | |
| "loss": 0.0009, | |
| "step": 45875 | |
| }, | |
| { | |
| "epoch": 23.12, | |
| "grad_norm": 1.2838494777679443, | |
| "learning_rate": 5.438994974874372e-06, | |
| "loss": 0.0008, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 23.14, | |
| "grad_norm": 2.554136037826538, | |
| "learning_rate": 5.4364824120603025e-06, | |
| "loss": 0.0012, | |
| "step": 45925 | |
| }, | |
| { | |
| "epoch": 23.15, | |
| "grad_norm": 1.0224181413650513, | |
| "learning_rate": 5.433969849246231e-06, | |
| "loss": 0.001, | |
| "step": 45950 | |
| }, | |
| { | |
| "epoch": 23.16, | |
| "grad_norm": 1.341222882270813, | |
| "learning_rate": 5.431457286432161e-06, | |
| "loss": 0.0009, | |
| "step": 45975 | |
| }, | |
| { | |
| "epoch": 23.17, | |
| "grad_norm": 0.49937868118286133, | |
| "learning_rate": 5.428944723618091e-06, | |
| "loss": 0.0007, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 23.17, | |
| "eval_loss": 0.346920371055603, | |
| "eval_runtime": 644.2541, | |
| "eval_samples_per_second": 2.187, | |
| "eval_steps_per_second": 2.187, | |
| "eval_wer": 22.656520235212728, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 23.19, | |
| "grad_norm": 0.7111514806747437, | |
| "learning_rate": 5.426432160804021e-06, | |
| "loss": 0.0008, | |
| "step": 46025 | |
| }, | |
| { | |
| "epoch": 23.2, | |
| "grad_norm": 0.8799687623977661, | |
| "learning_rate": 5.423919597989951e-06, | |
| "loss": 0.0008, | |
| "step": 46050 | |
| }, | |
| { | |
| "epoch": 23.21, | |
| "grad_norm": 0.3264644742012024, | |
| "learning_rate": 5.421407035175879e-06, | |
| "loss": 0.0012, | |
| "step": 46075 | |
| }, | |
| { | |
| "epoch": 23.22, | |
| "grad_norm": 1.2488361597061157, | |
| "learning_rate": 5.4188944723618095e-06, | |
| "loss": 0.0011, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 23.24, | |
| "grad_norm": 0.21208049356937408, | |
| "learning_rate": 5.416381909547739e-06, | |
| "loss": 0.001, | |
| "step": 46125 | |
| }, | |
| { | |
| "epoch": 23.25, | |
| "grad_norm": 0.593122661113739, | |
| "learning_rate": 5.413869346733669e-06, | |
| "loss": 0.0008, | |
| "step": 46150 | |
| }, | |
| { | |
| "epoch": 23.26, | |
| "grad_norm": 0.8040767312049866, | |
| "learning_rate": 5.411356783919598e-06, | |
| "loss": 0.0008, | |
| "step": 46175 | |
| }, | |
| { | |
| "epoch": 23.27, | |
| "grad_norm": 0.21935276687145233, | |
| "learning_rate": 5.4088442211055284e-06, | |
| "loss": 0.0009, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 23.29, | |
| "grad_norm": 0.16591764986515045, | |
| "learning_rate": 5.406331658291459e-06, | |
| "loss": 0.0007, | |
| "step": 46225 | |
| }, | |
| { | |
| "epoch": 23.3, | |
| "grad_norm": 0.7102475762367249, | |
| "learning_rate": 5.403819095477387e-06, | |
| "loss": 0.0011, | |
| "step": 46250 | |
| }, | |
| { | |
| "epoch": 23.31, | |
| "grad_norm": 2.245885133743286, | |
| "learning_rate": 5.401306532663317e-06, | |
| "loss": 0.0011, | |
| "step": 46275 | |
| }, | |
| { | |
| "epoch": 23.32, | |
| "grad_norm": 0.5632694363594055, | |
| "learning_rate": 5.398793969849247e-06, | |
| "loss": 0.0008, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 23.34, | |
| "grad_norm": 0.4642152190208435, | |
| "learning_rate": 5.396281407035177e-06, | |
| "loss": 0.0008, | |
| "step": 46325 | |
| }, | |
| { | |
| "epoch": 23.35, | |
| "grad_norm": 1.0977600812911987, | |
| "learning_rate": 5.393768844221105e-06, | |
| "loss": 0.0012, | |
| "step": 46350 | |
| }, | |
| { | |
| "epoch": 23.36, | |
| "grad_norm": 1.1424881219863892, | |
| "learning_rate": 5.391256281407035e-06, | |
| "loss": 0.0011, | |
| "step": 46375 | |
| }, | |
| { | |
| "epoch": 23.38, | |
| "grad_norm": 1.7661696672439575, | |
| "learning_rate": 5.388743718592965e-06, | |
| "loss": 0.0009, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 23.39, | |
| "grad_norm": 0.5764384269714355, | |
| "learning_rate": 5.386231155778895e-06, | |
| "loss": 0.0011, | |
| "step": 46425 | |
| }, | |
| { | |
| "epoch": 23.4, | |
| "grad_norm": 0.5465607047080994, | |
| "learning_rate": 5.383718592964825e-06, | |
| "loss": 0.0015, | |
| "step": 46450 | |
| }, | |
| { | |
| "epoch": 23.41, | |
| "grad_norm": 0.4862133264541626, | |
| "learning_rate": 5.381206030150754e-06, | |
| "loss": 0.0009, | |
| "step": 46475 | |
| }, | |
| { | |
| "epoch": 23.43, | |
| "grad_norm": 0.23647759854793549, | |
| "learning_rate": 5.3786934673366846e-06, | |
| "loss": 0.0007, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 23.44, | |
| "grad_norm": 0.3536996841430664, | |
| "learning_rate": 5.376180904522613e-06, | |
| "loss": 0.0007, | |
| "step": 46525 | |
| }, | |
| { | |
| "epoch": 23.45, | |
| "grad_norm": 1.0129157304763794, | |
| "learning_rate": 5.373668341708543e-06, | |
| "loss": 0.0008, | |
| "step": 46550 | |
| }, | |
| { | |
| "epoch": 23.46, | |
| "grad_norm": 0.41435906291007996, | |
| "learning_rate": 5.3711557788944725e-06, | |
| "loss": 0.0011, | |
| "step": 46575 | |
| }, | |
| { | |
| "epoch": 23.48, | |
| "grad_norm": 0.33409273624420166, | |
| "learning_rate": 5.368643216080403e-06, | |
| "loss": 0.001, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 23.49, | |
| "grad_norm": 0.2821144759654999, | |
| "learning_rate": 5.366130653266333e-06, | |
| "loss": 0.001, | |
| "step": 46625 | |
| }, | |
| { | |
| "epoch": 23.5, | |
| "grad_norm": 1.0986402034759521, | |
| "learning_rate": 5.363618090452261e-06, | |
| "loss": 0.001, | |
| "step": 46650 | |
| }, | |
| { | |
| "epoch": 23.51, | |
| "grad_norm": 1.016719102859497, | |
| "learning_rate": 5.3611055276381915e-06, | |
| "loss": 0.001, | |
| "step": 46675 | |
| }, | |
| { | |
| "epoch": 23.53, | |
| "grad_norm": 0.6379337310791016, | |
| "learning_rate": 5.358592964824121e-06, | |
| "loss": 0.001, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 23.54, | |
| "grad_norm": 0.8883301019668579, | |
| "learning_rate": 5.356080402010051e-06, | |
| "loss": 0.0008, | |
| "step": 46725 | |
| }, | |
| { | |
| "epoch": 23.55, | |
| "grad_norm": 0.7816546559333801, | |
| "learning_rate": 5.35356783919598e-06, | |
| "loss": 0.0009, | |
| "step": 46750 | |
| }, | |
| { | |
| "epoch": 23.56, | |
| "grad_norm": 0.9444398283958435, | |
| "learning_rate": 5.3510552763819105e-06, | |
| "loss": 0.0012, | |
| "step": 46775 | |
| }, | |
| { | |
| "epoch": 23.58, | |
| "grad_norm": 0.7157164812088013, | |
| "learning_rate": 5.348542713567839e-06, | |
| "loss": 0.001, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 23.59, | |
| "grad_norm": 0.31857380270957947, | |
| "learning_rate": 5.346030150753769e-06, | |
| "loss": 0.0011, | |
| "step": 46825 | |
| }, | |
| { | |
| "epoch": 23.6, | |
| "grad_norm": 1.1218419075012207, | |
| "learning_rate": 5.343517587939699e-06, | |
| "loss": 0.0014, | |
| "step": 46850 | |
| }, | |
| { | |
| "epoch": 23.61, | |
| "grad_norm": 1.2285215854644775, | |
| "learning_rate": 5.341005025125629e-06, | |
| "loss": 0.001, | |
| "step": 46875 | |
| }, | |
| { | |
| "epoch": 23.63, | |
| "grad_norm": 1.1362957954406738, | |
| "learning_rate": 5.338492462311559e-06, | |
| "loss": 0.001, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 23.64, | |
| "grad_norm": 1.0595365762710571, | |
| "learning_rate": 5.335979899497487e-06, | |
| "loss": 0.001, | |
| "step": 46925 | |
| }, | |
| { | |
| "epoch": 23.65, | |
| "grad_norm": 0.9272093176841736, | |
| "learning_rate": 5.3334673366834175e-06, | |
| "loss": 0.0012, | |
| "step": 46950 | |
| }, | |
| { | |
| "epoch": 23.66, | |
| "grad_norm": 1.4149200916290283, | |
| "learning_rate": 5.330954773869347e-06, | |
| "loss": 0.001, | |
| "step": 46975 | |
| }, | |
| { | |
| "epoch": 23.68, | |
| "grad_norm": 0.7414202094078064, | |
| "learning_rate": 5.328442211055277e-06, | |
| "loss": 0.0015, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 23.68, | |
| "eval_loss": 0.3519901931285858, | |
| "eval_runtime": 650.2703, | |
| "eval_samples_per_second": 2.167, | |
| "eval_steps_per_second": 2.167, | |
| "eval_wer": 22.905569007263924, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 23.69, | |
| "grad_norm": 1.5265312194824219, | |
| "learning_rate": 5.325929648241206e-06, | |
| "loss": 0.0012, | |
| "step": 47025 | |
| }, | |
| { | |
| "epoch": 23.7, | |
| "grad_norm": 0.3424956500530243, | |
| "learning_rate": 5.3234170854271364e-06, | |
| "loss": 0.001, | |
| "step": 47050 | |
| }, | |
| { | |
| "epoch": 23.72, | |
| "grad_norm": 1.250054121017456, | |
| "learning_rate": 5.320904522613067e-06, | |
| "loss": 0.0012, | |
| "step": 47075 | |
| }, | |
| { | |
| "epoch": 23.73, | |
| "grad_norm": 0.7167928218841553, | |
| "learning_rate": 5.318391959798995e-06, | |
| "loss": 0.0011, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 23.74, | |
| "grad_norm": 1.2113206386566162, | |
| "learning_rate": 5.315879396984925e-06, | |
| "loss": 0.001, | |
| "step": 47125 | |
| }, | |
| { | |
| "epoch": 23.75, | |
| "grad_norm": 0.683556079864502, | |
| "learning_rate": 5.313366834170855e-06, | |
| "loss": 0.0011, | |
| "step": 47150 | |
| }, | |
| { | |
| "epoch": 23.77, | |
| "grad_norm": 0.2526809871196747, | |
| "learning_rate": 5.310854271356785e-06, | |
| "loss": 0.001, | |
| "step": 47175 | |
| }, | |
| { | |
| "epoch": 23.78, | |
| "grad_norm": 1.4190630912780762, | |
| "learning_rate": 5.308341708542713e-06, | |
| "loss": 0.0012, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 23.79, | |
| "grad_norm": 1.7319457530975342, | |
| "learning_rate": 5.305829145728643e-06, | |
| "loss": 0.0014, | |
| "step": 47225 | |
| }, | |
| { | |
| "epoch": 23.8, | |
| "grad_norm": 0.9196786284446716, | |
| "learning_rate": 5.3033165829145736e-06, | |
| "loss": 0.0009, | |
| "step": 47250 | |
| }, | |
| { | |
| "epoch": 23.82, | |
| "grad_norm": 0.6173463463783264, | |
| "learning_rate": 5.300804020100503e-06, | |
| "loss": 0.001, | |
| "step": 47275 | |
| }, | |
| { | |
| "epoch": 23.83, | |
| "grad_norm": 0.6350324153900146, | |
| "learning_rate": 5.298291457286433e-06, | |
| "loss": 0.0012, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 23.84, | |
| "grad_norm": 0.24510569870471954, | |
| "learning_rate": 5.295778894472362e-06, | |
| "loss": 0.0007, | |
| "step": 47325 | |
| }, | |
| { | |
| "epoch": 23.85, | |
| "grad_norm": 0.6556370258331299, | |
| "learning_rate": 5.2932663316582926e-06, | |
| "loss": 0.0012, | |
| "step": 47350 | |
| }, | |
| { | |
| "epoch": 23.87, | |
| "grad_norm": 0.13942670822143555, | |
| "learning_rate": 5.290753768844221e-06, | |
| "loss": 0.001, | |
| "step": 47375 | |
| }, | |
| { | |
| "epoch": 23.88, | |
| "grad_norm": 0.5470798015594482, | |
| "learning_rate": 5.288241206030151e-06, | |
| "loss": 0.001, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 23.89, | |
| "grad_norm": 0.18008272349834442, | |
| "learning_rate": 5.2857286432160805e-06, | |
| "loss": 0.0009, | |
| "step": 47425 | |
| }, | |
| { | |
| "epoch": 23.9, | |
| "grad_norm": 0.6318380832672119, | |
| "learning_rate": 5.283216080402011e-06, | |
| "loss": 0.0011, | |
| "step": 47450 | |
| }, | |
| { | |
| "epoch": 23.92, | |
| "grad_norm": 0.3588716387748718, | |
| "learning_rate": 5.280703517587941e-06, | |
| "loss": 0.0013, | |
| "step": 47475 | |
| }, | |
| { | |
| "epoch": 23.93, | |
| "grad_norm": 0.29687148332595825, | |
| "learning_rate": 5.278190954773869e-06, | |
| "loss": 0.001, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 23.94, | |
| "grad_norm": 0.864915132522583, | |
| "learning_rate": 5.2756783919597995e-06, | |
| "loss": 0.0012, | |
| "step": 47525 | |
| }, | |
| { | |
| "epoch": 23.95, | |
| "grad_norm": 1.526944637298584, | |
| "learning_rate": 5.273165829145729e-06, | |
| "loss": 0.0011, | |
| "step": 47550 | |
| }, | |
| { | |
| "epoch": 23.97, | |
| "grad_norm": 1.1919291019439697, | |
| "learning_rate": 5.270653266331659e-06, | |
| "loss": 0.0012, | |
| "step": 47575 | |
| }, | |
| { | |
| "epoch": 23.98, | |
| "grad_norm": 0.46352332830429077, | |
| "learning_rate": 5.268140703517588e-06, | |
| "loss": 0.001, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 23.99, | |
| "grad_norm": 1.840276837348938, | |
| "learning_rate": 5.2656281407035185e-06, | |
| "loss": 0.0011, | |
| "step": 47625 | |
| }, | |
| { | |
| "epoch": 24.01, | |
| "grad_norm": 0.4287075996398926, | |
| "learning_rate": 5.263115577889447e-06, | |
| "loss": 0.001, | |
| "step": 47650 | |
| }, | |
| { | |
| "epoch": 24.02, | |
| "grad_norm": 0.6221798062324524, | |
| "learning_rate": 5.260603015075377e-06, | |
| "loss": 0.0008, | |
| "step": 47675 | |
| }, | |
| { | |
| "epoch": 24.03, | |
| "grad_norm": 0.7368704676628113, | |
| "learning_rate": 5.258090452261307e-06, | |
| "loss": 0.0009, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 24.04, | |
| "grad_norm": 0.7908537983894348, | |
| "learning_rate": 5.255577889447237e-06, | |
| "loss": 0.0007, | |
| "step": 47725 | |
| }, | |
| { | |
| "epoch": 24.06, | |
| "grad_norm": 0.32871556282043457, | |
| "learning_rate": 5.253065326633167e-06, | |
| "loss": 0.0006, | |
| "step": 47750 | |
| }, | |
| { | |
| "epoch": 24.07, | |
| "grad_norm": 0.5463153719902039, | |
| "learning_rate": 5.250552763819095e-06, | |
| "loss": 0.0005, | |
| "step": 47775 | |
| }, | |
| { | |
| "epoch": 24.08, | |
| "grad_norm": 1.1261115074157715, | |
| "learning_rate": 5.2480402010050255e-06, | |
| "loss": 0.0008, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 24.09, | |
| "grad_norm": 0.19659703969955444, | |
| "learning_rate": 5.245527638190955e-06, | |
| "loss": 0.0007, | |
| "step": 47825 | |
| }, | |
| { | |
| "epoch": 24.11, | |
| "grad_norm": 1.187845230102539, | |
| "learning_rate": 5.243015075376885e-06, | |
| "loss": 0.0005, | |
| "step": 47850 | |
| }, | |
| { | |
| "epoch": 24.12, | |
| "grad_norm": 0.7721708416938782, | |
| "learning_rate": 5.240502512562814e-06, | |
| "loss": 0.0006, | |
| "step": 47875 | |
| }, | |
| { | |
| "epoch": 24.13, | |
| "grad_norm": 1.14128577709198, | |
| "learning_rate": 5.2379899497487444e-06, | |
| "loss": 0.0007, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 24.14, | |
| "grad_norm": 0.2921935021877289, | |
| "learning_rate": 5.235477386934675e-06, | |
| "loss": 0.0006, | |
| "step": 47925 | |
| }, | |
| { | |
| "epoch": 24.16, | |
| "grad_norm": 0.542747437953949, | |
| "learning_rate": 5.232964824120603e-06, | |
| "loss": 0.0007, | |
| "step": 47950 | |
| }, | |
| { | |
| "epoch": 24.17, | |
| "grad_norm": 0.1501588523387909, | |
| "learning_rate": 5.230452261306533e-06, | |
| "loss": 0.0006, | |
| "step": 47975 | |
| }, | |
| { | |
| "epoch": 24.18, | |
| "grad_norm": 1.1452776193618774, | |
| "learning_rate": 5.227939698492463e-06, | |
| "loss": 0.0006, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 24.18, | |
| "eval_loss": 0.3501649498939514, | |
| "eval_runtime": 653.8098, | |
| "eval_samples_per_second": 2.155, | |
| "eval_steps_per_second": 2.155, | |
| "eval_wer": 22.365963334486334, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 24.19, | |
| "grad_norm": 0.4397837817668915, | |
| "learning_rate": 5.225427135678393e-06, | |
| "loss": 0.0008, | |
| "step": 48025 | |
| }, | |
| { | |
| "epoch": 24.21, | |
| "grad_norm": 0.2055756002664566, | |
| "learning_rate": 5.222914572864321e-06, | |
| "loss": 0.0007, | |
| "step": 48050 | |
| }, | |
| { | |
| "epoch": 24.22, | |
| "grad_norm": 0.5878810882568359, | |
| "learning_rate": 5.220402010050251e-06, | |
| "loss": 0.0006, | |
| "step": 48075 | |
| }, | |
| { | |
| "epoch": 24.23, | |
| "grad_norm": 0.17719118297100067, | |
| "learning_rate": 5.217889447236182e-06, | |
| "loss": 0.0006, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 24.24, | |
| "grad_norm": 0.6639860272407532, | |
| "learning_rate": 5.215376884422111e-06, | |
| "loss": 0.0007, | |
| "step": 48125 | |
| }, | |
| { | |
| "epoch": 24.26, | |
| "grad_norm": 0.47352033853530884, | |
| "learning_rate": 5.212864321608041e-06, | |
| "loss": 0.0007, | |
| "step": 48150 | |
| }, | |
| { | |
| "epoch": 24.27, | |
| "grad_norm": 0.5616294145584106, | |
| "learning_rate": 5.21035175879397e-06, | |
| "loss": 0.0007, | |
| "step": 48175 | |
| }, | |
| { | |
| "epoch": 24.28, | |
| "grad_norm": 0.23458968102931976, | |
| "learning_rate": 5.2078391959799006e-06, | |
| "loss": 0.0005, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 24.29, | |
| "grad_norm": 1.2819747924804688, | |
| "learning_rate": 5.205326633165829e-06, | |
| "loss": 0.0009, | |
| "step": 48225 | |
| }, | |
| { | |
| "epoch": 24.31, | |
| "grad_norm": 0.9145079255104065, | |
| "learning_rate": 5.202814070351759e-06, | |
| "loss": 0.001, | |
| "step": 48250 | |
| }, | |
| { | |
| "epoch": 24.32, | |
| "grad_norm": 0.3194675147533417, | |
| "learning_rate": 5.2003015075376885e-06, | |
| "loss": 0.0007, | |
| "step": 48275 | |
| }, | |
| { | |
| "epoch": 24.33, | |
| "grad_norm": 0.7687875032424927, | |
| "learning_rate": 5.197788944723619e-06, | |
| "loss": 0.0007, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 24.35, | |
| "grad_norm": 0.46240946650505066, | |
| "learning_rate": 5.195276381909549e-06, | |
| "loss": 0.0007, | |
| "step": 48325 | |
| }, | |
| { | |
| "epoch": 24.36, | |
| "grad_norm": 0.9186506271362305, | |
| "learning_rate": 5.192763819095477e-06, | |
| "loss": 0.0007, | |
| "step": 48350 | |
| }, | |
| { | |
| "epoch": 24.37, | |
| "grad_norm": 0.2463284581899643, | |
| "learning_rate": 5.1902512562814075e-06, | |
| "loss": 0.0008, | |
| "step": 48375 | |
| }, | |
| { | |
| "epoch": 24.38, | |
| "grad_norm": 0.2767968773841858, | |
| "learning_rate": 5.187738693467337e-06, | |
| "loss": 0.001, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 24.4, | |
| "grad_norm": 0.46523067355155945, | |
| "learning_rate": 5.185226130653267e-06, | |
| "loss": 0.0009, | |
| "step": 48425 | |
| }, | |
| { | |
| "epoch": 24.41, | |
| "grad_norm": 0.23646961152553558, | |
| "learning_rate": 5.182713567839196e-06, | |
| "loss": 0.0009, | |
| "step": 48450 | |
| }, | |
| { | |
| "epoch": 24.42, | |
| "grad_norm": 0.6883164048194885, | |
| "learning_rate": 5.1802010050251265e-06, | |
| "loss": 0.0007, | |
| "step": 48475 | |
| }, | |
| { | |
| "epoch": 24.43, | |
| "grad_norm": 0.33344462513923645, | |
| "learning_rate": 5.177688442211055e-06, | |
| "loss": 0.0009, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 24.45, | |
| "grad_norm": 0.1479116529226303, | |
| "learning_rate": 5.175175879396985e-06, | |
| "loss": 0.0007, | |
| "step": 48525 | |
| }, | |
| { | |
| "epoch": 24.46, | |
| "grad_norm": 1.2230092287063599, | |
| "learning_rate": 5.172663316582915e-06, | |
| "loss": 0.0008, | |
| "step": 48550 | |
| }, | |
| { | |
| "epoch": 24.47, | |
| "grad_norm": 0.17954891920089722, | |
| "learning_rate": 5.170150753768845e-06, | |
| "loss": 0.0009, | |
| "step": 48575 | |
| }, | |
| { | |
| "epoch": 24.48, | |
| "grad_norm": 0.7569608092308044, | |
| "learning_rate": 5.167638190954775e-06, | |
| "loss": 0.0007, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 24.5, | |
| "grad_norm": 0.9554746747016907, | |
| "learning_rate": 5.165125628140703e-06, | |
| "loss": 0.0009, | |
| "step": 48625 | |
| }, | |
| { | |
| "epoch": 24.51, | |
| "grad_norm": 0.6516941785812378, | |
| "learning_rate": 5.1626130653266335e-06, | |
| "loss": 0.0009, | |
| "step": 48650 | |
| }, | |
| { | |
| "epoch": 24.52, | |
| "grad_norm": 0.6526229381561279, | |
| "learning_rate": 5.160100502512563e-06, | |
| "loss": 0.001, | |
| "step": 48675 | |
| }, | |
| { | |
| "epoch": 24.53, | |
| "grad_norm": 0.17622053623199463, | |
| "learning_rate": 5.157587939698493e-06, | |
| "loss": 0.0007, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 24.55, | |
| "grad_norm": 0.8113358616828918, | |
| "learning_rate": 5.155075376884423e-06, | |
| "loss": 0.0009, | |
| "step": 48725 | |
| }, | |
| { | |
| "epoch": 24.56, | |
| "grad_norm": 0.7531803846359253, | |
| "learning_rate": 5.1525628140703525e-06, | |
| "loss": 0.0008, | |
| "step": 48750 | |
| }, | |
| { | |
| "epoch": 24.57, | |
| "grad_norm": 0.82627934217453, | |
| "learning_rate": 5.150050251256283e-06, | |
| "loss": 0.0007, | |
| "step": 48775 | |
| }, | |
| { | |
| "epoch": 24.58, | |
| "grad_norm": 1.1042143106460571, | |
| "learning_rate": 5.147537688442211e-06, | |
| "loss": 0.0007, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 24.6, | |
| "grad_norm": 0.34287524223327637, | |
| "learning_rate": 5.145025125628141e-06, | |
| "loss": 0.0006, | |
| "step": 48825 | |
| }, | |
| { | |
| "epoch": 24.61, | |
| "grad_norm": 1.404876470565796, | |
| "learning_rate": 5.142512562814071e-06, | |
| "loss": 0.001, | |
| "step": 48850 | |
| }, | |
| { | |
| "epoch": 24.62, | |
| "grad_norm": 0.3671759068965912, | |
| "learning_rate": 5.140000000000001e-06, | |
| "loss": 0.0009, | |
| "step": 48875 | |
| }, | |
| { | |
| "epoch": 24.63, | |
| "grad_norm": 0.7027815580368042, | |
| "learning_rate": 5.137487437185929e-06, | |
| "loss": 0.0009, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 24.65, | |
| "grad_norm": 0.8009297847747803, | |
| "learning_rate": 5.134974874371859e-06, | |
| "loss": 0.001, | |
| "step": 48925 | |
| }, | |
| { | |
| "epoch": 24.66, | |
| "grad_norm": 1.0700089931488037, | |
| "learning_rate": 5.13246231155779e-06, | |
| "loss": 0.001, | |
| "step": 48950 | |
| }, | |
| { | |
| "epoch": 24.67, | |
| "grad_norm": 0.4704926609992981, | |
| "learning_rate": 5.129949748743719e-06, | |
| "loss": 0.0014, | |
| "step": 48975 | |
| }, | |
| { | |
| "epoch": 24.69, | |
| "grad_norm": 0.2424398958683014, | |
| "learning_rate": 5.127437185929649e-06, | |
| "loss": 0.0012, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 24.69, | |
| "eval_loss": 0.3495071828365326, | |
| "eval_runtime": 650.7727, | |
| "eval_samples_per_second": 2.165, | |
| "eval_steps_per_second": 2.165, | |
| "eval_wer": 22.87097890003459, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 24.7, | |
| "grad_norm": 0.5802572965621948, | |
| "learning_rate": 5.124924623115578e-06, | |
| "loss": 0.001, | |
| "step": 49025 | |
| }, | |
| { | |
| "epoch": 24.71, | |
| "grad_norm": 0.9147945046424866, | |
| "learning_rate": 5.1224120603015086e-06, | |
| "loss": 0.001, | |
| "step": 49050 | |
| }, | |
| { | |
| "epoch": 24.72, | |
| "grad_norm": 0.9649335145950317, | |
| "learning_rate": 5.119899497487437e-06, | |
| "loss": 0.0014, | |
| "step": 49075 | |
| }, | |
| { | |
| "epoch": 24.74, | |
| "grad_norm": 0.6083235144615173, | |
| "learning_rate": 5.117386934673367e-06, | |
| "loss": 0.0009, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 24.75, | |
| "grad_norm": 0.29622915387153625, | |
| "learning_rate": 5.1148743718592965e-06, | |
| "loss": 0.0015, | |
| "step": 49125 | |
| }, | |
| { | |
| "epoch": 24.76, | |
| "grad_norm": 0.1976325958967209, | |
| "learning_rate": 5.112361809045227e-06, | |
| "loss": 0.0012, | |
| "step": 49150 | |
| }, | |
| { | |
| "epoch": 24.77, | |
| "grad_norm": 0.6826661825180054, | |
| "learning_rate": 5.109849246231157e-06, | |
| "loss": 0.0012, | |
| "step": 49175 | |
| }, | |
| { | |
| "epoch": 24.79, | |
| "grad_norm": 1.7492895126342773, | |
| "learning_rate": 5.107336683417085e-06, | |
| "loss": 0.0011, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 24.8, | |
| "grad_norm": 1.210092306137085, | |
| "learning_rate": 5.1048241206030155e-06, | |
| "loss": 0.0009, | |
| "step": 49225 | |
| }, | |
| { | |
| "epoch": 24.81, | |
| "grad_norm": 0.6438060998916626, | |
| "learning_rate": 5.102311557788945e-06, | |
| "loss": 0.0007, | |
| "step": 49250 | |
| }, | |
| { | |
| "epoch": 24.82, | |
| "grad_norm": 1.0905343294143677, | |
| "learning_rate": 5.099798994974875e-06, | |
| "loss": 0.0009, | |
| "step": 49275 | |
| }, | |
| { | |
| "epoch": 24.84, | |
| "grad_norm": 0.9838513135910034, | |
| "learning_rate": 5.097286432160804e-06, | |
| "loss": 0.0008, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 24.85, | |
| "grad_norm": 0.906428337097168, | |
| "learning_rate": 5.0947738693467345e-06, | |
| "loss": 0.0006, | |
| "step": 49325 | |
| }, | |
| { | |
| "epoch": 24.86, | |
| "grad_norm": 0.5822015404701233, | |
| "learning_rate": 5.092261306532665e-06, | |
| "loss": 0.001, | |
| "step": 49350 | |
| }, | |
| { | |
| "epoch": 24.87, | |
| "grad_norm": 0.6224560141563416, | |
| "learning_rate": 5.089748743718593e-06, | |
| "loss": 0.0008, | |
| "step": 49375 | |
| }, | |
| { | |
| "epoch": 24.89, | |
| "grad_norm": 0.12286537140607834, | |
| "learning_rate": 5.087236180904523e-06, | |
| "loss": 0.0008, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 24.9, | |
| "grad_norm": 0.958941638469696, | |
| "learning_rate": 5.084723618090453e-06, | |
| "loss": 0.0008, | |
| "step": 49425 | |
| }, | |
| { | |
| "epoch": 24.91, | |
| "grad_norm": 0.3192073106765747, | |
| "learning_rate": 5.082211055276383e-06, | |
| "loss": 0.0009, | |
| "step": 49450 | |
| }, | |
| { | |
| "epoch": 24.92, | |
| "grad_norm": NaN, | |
| "learning_rate": 5.079798994974875e-06, | |
| "loss": 0.0009, | |
| "step": 49475 | |
| }, | |
| { | |
| "epoch": 24.94, | |
| "grad_norm": 0.13195385038852692, | |
| "learning_rate": 5.077286432160804e-06, | |
| "loss": 0.0008, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 24.95, | |
| "grad_norm": 1.2665826082229614, | |
| "learning_rate": 5.074773869346734e-06, | |
| "loss": 0.0009, | |
| "step": 49525 | |
| }, | |
| { | |
| "epoch": 24.96, | |
| "grad_norm": 0.7236266732215881, | |
| "learning_rate": 5.072261306532664e-06, | |
| "loss": 0.0009, | |
| "step": 49550 | |
| }, | |
| { | |
| "epoch": 24.97, | |
| "grad_norm": 1.4165126085281372, | |
| "learning_rate": 5.069748743718593e-06, | |
| "loss": 0.0011, | |
| "step": 49575 | |
| }, | |
| { | |
| "epoch": 24.99, | |
| "grad_norm": 0.6968191266059875, | |
| "learning_rate": 5.0672361809045235e-06, | |
| "loss": 0.0011, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 0.3853646218776703, | |
| "learning_rate": 5.064723618090453e-06, | |
| "loss": 0.001, | |
| "step": 49625 | |
| }, | |
| { | |
| "epoch": 25.01, | |
| "grad_norm": 1.0228655338287354, | |
| "learning_rate": 5.062211055276382e-06, | |
| "loss": 0.0006, | |
| "step": 49650 | |
| }, | |
| { | |
| "epoch": 25.03, | |
| "grad_norm": 0.07551419734954834, | |
| "learning_rate": 5.0596984924623115e-06, | |
| "loss": 0.0009, | |
| "step": 49675 | |
| }, | |
| { | |
| "epoch": 25.04, | |
| "grad_norm": 0.9682655334472656, | |
| "learning_rate": 5.057185929648242e-06, | |
| "loss": 0.0009, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 25.05, | |
| "grad_norm": 0.4476355016231537, | |
| "learning_rate": 5.054673366834171e-06, | |
| "loss": 0.0008, | |
| "step": 49725 | |
| }, | |
| { | |
| "epoch": 25.06, | |
| "grad_norm": 0.44491493701934814, | |
| "learning_rate": 5.052160804020101e-06, | |
| "loss": 0.0009, | |
| "step": 49750 | |
| }, | |
| { | |
| "epoch": 25.08, | |
| "grad_norm": 1.1377488374710083, | |
| "learning_rate": 5.049648241206031e-06, | |
| "loss": 0.0007, | |
| "step": 49775 | |
| }, | |
| { | |
| "epoch": 25.09, | |
| "grad_norm": 0.2252766638994217, | |
| "learning_rate": 5.04713567839196e-06, | |
| "loss": 0.0006, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 25.1, | |
| "grad_norm": 0.3592166006565094, | |
| "learning_rate": 5.04462311557789e-06, | |
| "loss": 0.0008, | |
| "step": 49825 | |
| }, | |
| { | |
| "epoch": 25.11, | |
| "grad_norm": 0.2291008085012436, | |
| "learning_rate": 5.042110552763819e-06, | |
| "loss": 0.0005, | |
| "step": 49850 | |
| }, | |
| { | |
| "epoch": 25.13, | |
| "grad_norm": 0.36598822474479675, | |
| "learning_rate": 5.0395979899497495e-06, | |
| "loss": 0.0004, | |
| "step": 49875 | |
| }, | |
| { | |
| "epoch": 25.14, | |
| "grad_norm": 0.13307011127471924, | |
| "learning_rate": 5.037085427135679e-06, | |
| "loss": 0.0006, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 25.15, | |
| "grad_norm": 0.5685657858848572, | |
| "learning_rate": 5.034572864321608e-06, | |
| "loss": 0.0007, | |
| "step": 49925 | |
| }, | |
| { | |
| "epoch": 25.16, | |
| "grad_norm": 0.836059033870697, | |
| "learning_rate": 5.0320603015075374e-06, | |
| "loss": 0.0008, | |
| "step": 49950 | |
| }, | |
| { | |
| "epoch": 25.18, | |
| "grad_norm": 0.8621478080749512, | |
| "learning_rate": 5.029547738693468e-06, | |
| "loss": 0.0007, | |
| "step": 49975 | |
| }, | |
| { | |
| "epoch": 25.19, | |
| "grad_norm": 0.750243067741394, | |
| "learning_rate": 5.027035175879398e-06, | |
| "loss": 0.0007, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 25.19, | |
| "eval_loss": 0.3517380654811859, | |
| "eval_runtime": 645.2524, | |
| "eval_samples_per_second": 2.184, | |
| "eval_steps_per_second": 2.184, | |
| "eval_wer": 23.189207886544448, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 25.2, | |
| "grad_norm": 0.194803848862648, | |
| "learning_rate": 5.024522613065327e-06, | |
| "loss": 0.0007, | |
| "step": 50025 | |
| }, | |
| { | |
| "epoch": 25.21, | |
| "grad_norm": 0.9492383599281311, | |
| "learning_rate": 5.022010050251257e-06, | |
| "loss": 0.0008, | |
| "step": 50050 | |
| }, | |
| { | |
| "epoch": 25.23, | |
| "grad_norm": 0.3268399238586426, | |
| "learning_rate": 5.019497487437186e-06, | |
| "loss": 0.0005, | |
| "step": 50075 | |
| }, | |
| { | |
| "epoch": 25.24, | |
| "grad_norm": 0.23588715493679047, | |
| "learning_rate": 5.016984924623116e-06, | |
| "loss": 0.0005, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 25.25, | |
| "grad_norm": 0.3195103704929352, | |
| "learning_rate": 5.014472361809045e-06, | |
| "loss": 0.0006, | |
| "step": 50125 | |
| }, | |
| { | |
| "epoch": 25.26, | |
| "grad_norm": 0.5224353075027466, | |
| "learning_rate": 5.011959798994975e-06, | |
| "loss": 0.0005, | |
| "step": 50150 | |
| }, | |
| { | |
| "epoch": 25.28, | |
| "grad_norm": 0.3808083236217499, | |
| "learning_rate": 5.009447236180906e-06, | |
| "loss": 0.0006, | |
| "step": 50175 | |
| }, | |
| { | |
| "epoch": 25.29, | |
| "grad_norm": 0.8282648921012878, | |
| "learning_rate": 5.006934673366834e-06, | |
| "loss": 0.0005, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 25.3, | |
| "grad_norm": 1.0018339157104492, | |
| "learning_rate": 5.004422110552764e-06, | |
| "loss": 0.0005, | |
| "step": 50225 | |
| }, | |
| { | |
| "epoch": 25.31, | |
| "grad_norm": 0.6774661540985107, | |
| "learning_rate": 5.0019095477386935e-06, | |
| "loss": 0.0007, | |
| "step": 50250 | |
| }, | |
| { | |
| "epoch": 25.33, | |
| "grad_norm": 0.9828543663024902, | |
| "learning_rate": 4.999396984924624e-06, | |
| "loss": 0.0008, | |
| "step": 50275 | |
| }, | |
| { | |
| "epoch": 25.34, | |
| "grad_norm": 0.5815138220787048, | |
| "learning_rate": 4.996884422110553e-06, | |
| "loss": 0.0007, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 25.35, | |
| "grad_norm": 0.20348763465881348, | |
| "learning_rate": 4.994371859296483e-06, | |
| "loss": 0.001, | |
| "step": 50325 | |
| }, | |
| { | |
| "epoch": 25.37, | |
| "grad_norm": 0.4329879879951477, | |
| "learning_rate": 4.9918592964824125e-06, | |
| "loss": 0.0013, | |
| "step": 50350 | |
| }, | |
| { | |
| "epoch": 25.38, | |
| "grad_norm": 0.5430723428726196, | |
| "learning_rate": 4.989346733668342e-06, | |
| "loss": 0.0008, | |
| "step": 50375 | |
| }, | |
| { | |
| "epoch": 25.39, | |
| "grad_norm": 1.5767520666122437, | |
| "learning_rate": 4.986834170854272e-06, | |
| "loss": 0.001, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 25.4, | |
| "grad_norm": 0.2442205548286438, | |
| "learning_rate": 4.984321608040201e-06, | |
| "loss": 0.0005, | |
| "step": 50425 | |
| }, | |
| { | |
| "epoch": 25.42, | |
| "grad_norm": 0.20690348744392395, | |
| "learning_rate": 4.981809045226131e-06, | |
| "loss": 0.0009, | |
| "step": 50450 | |
| }, | |
| { | |
| "epoch": 25.43, | |
| "grad_norm": 0.6699907183647156, | |
| "learning_rate": 4.979296482412061e-06, | |
| "loss": 0.0007, | |
| "step": 50475 | |
| }, | |
| { | |
| "epoch": 25.44, | |
| "grad_norm": 1.3233988285064697, | |
| "learning_rate": 4.97678391959799e-06, | |
| "loss": 0.0008, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 25.45, | |
| "grad_norm": 0.44620636105537415, | |
| "learning_rate": 4.97427135678392e-06, | |
| "loss": 0.0006, | |
| "step": 50525 | |
| }, | |
| { | |
| "epoch": 25.47, | |
| "grad_norm": 0.28157714009284973, | |
| "learning_rate": 4.97175879396985e-06, | |
| "loss": 0.0007, | |
| "step": 50550 | |
| }, | |
| { | |
| "epoch": 25.48, | |
| "grad_norm": 0.821960985660553, | |
| "learning_rate": 4.969246231155779e-06, | |
| "loss": 0.0006, | |
| "step": 50575 | |
| }, | |
| { | |
| "epoch": 25.49, | |
| "grad_norm": 0.5584134459495544, | |
| "learning_rate": 4.966733668341709e-06, | |
| "loss": 0.0007, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 25.5, | |
| "grad_norm": 0.801690399646759, | |
| "learning_rate": 4.9642211055276385e-06, | |
| "loss": 0.0006, | |
| "step": 50625 | |
| }, | |
| { | |
| "epoch": 25.52, | |
| "grad_norm": 1.2869949340820312, | |
| "learning_rate": 4.961708542713568e-06, | |
| "loss": 0.0008, | |
| "step": 50650 | |
| }, | |
| { | |
| "epoch": 25.53, | |
| "grad_norm": 0.13358135521411896, | |
| "learning_rate": 4.959195979899498e-06, | |
| "loss": 0.0006, | |
| "step": 50675 | |
| }, | |
| { | |
| "epoch": 25.54, | |
| "grad_norm": 0.5842620134353638, | |
| "learning_rate": 4.956683417085428e-06, | |
| "loss": 0.0008, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 25.55, | |
| "grad_norm": 0.3312392234802246, | |
| "learning_rate": 4.9541708542713575e-06, | |
| "loss": 0.0007, | |
| "step": 50725 | |
| }, | |
| { | |
| "epoch": 25.57, | |
| "grad_norm": 0.9442441463470459, | |
| "learning_rate": 4.951658291457287e-06, | |
| "loss": 0.0007, | |
| "step": 50750 | |
| }, | |
| { | |
| "epoch": 25.58, | |
| "grad_norm": 0.8669309616088867, | |
| "learning_rate": 4.949145728643216e-06, | |
| "loss": 0.0009, | |
| "step": 50775 | |
| }, | |
| { | |
| "epoch": 25.59, | |
| "grad_norm": 1.1537563800811768, | |
| "learning_rate": 4.946633165829146e-06, | |
| "loss": 0.0007, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 25.6, | |
| "grad_norm": 0.9006689190864563, | |
| "learning_rate": 4.944120603015076e-06, | |
| "loss": 0.0006, | |
| "step": 50825 | |
| }, | |
| { | |
| "epoch": 25.62, | |
| "grad_norm": 0.2254357486963272, | |
| "learning_rate": 4.941608040201005e-06, | |
| "loss": 0.0008, | |
| "step": 50850 | |
| }, | |
| { | |
| "epoch": 25.63, | |
| "grad_norm": 0.6781788468360901, | |
| "learning_rate": 4.939095477386935e-06, | |
| "loss": 0.0009, | |
| "step": 50875 | |
| }, | |
| { | |
| "epoch": 25.64, | |
| "grad_norm": 0.49428144097328186, | |
| "learning_rate": 4.936582914572865e-06, | |
| "loss": 0.0007, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 25.65, | |
| "grad_norm": 0.15863998234272003, | |
| "learning_rate": 4.934070351758795e-06, | |
| "loss": 0.0007, | |
| "step": 50925 | |
| }, | |
| { | |
| "epoch": 25.67, | |
| "grad_norm": 1.1742552518844604, | |
| "learning_rate": 4.931557788944724e-06, | |
| "loss": 0.001, | |
| "step": 50950 | |
| }, | |
| { | |
| "epoch": 25.68, | |
| "grad_norm": 0.7124210596084595, | |
| "learning_rate": 4.929045226130654e-06, | |
| "loss": 0.0007, | |
| "step": 50975 | |
| }, | |
| { | |
| "epoch": 25.69, | |
| "grad_norm": 0.11829496920108795, | |
| "learning_rate": 4.926532663316583e-06, | |
| "loss": 0.0007, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 25.69, | |
| "eval_loss": 0.35887035727500916, | |
| "eval_runtime": 648.5707, | |
| "eval_samples_per_second": 2.172, | |
| "eval_steps_per_second": 2.172, | |
| "eval_wer": 22.656520235212728, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 25.71, | |
| "grad_norm": 1.2925130128860474, | |
| "learning_rate": 4.924020100502513e-06, | |
| "loss": 0.0009, | |
| "step": 51025 | |
| }, | |
| { | |
| "epoch": 25.72, | |
| "grad_norm": 0.9622329473495483, | |
| "learning_rate": 4.921507537688442e-06, | |
| "loss": 0.001, | |
| "step": 51050 | |
| }, | |
| { | |
| "epoch": 25.73, | |
| "grad_norm": 0.15050731599330902, | |
| "learning_rate": 4.918994974874372e-06, | |
| "loss": 0.0009, | |
| "step": 51075 | |
| }, | |
| { | |
| "epoch": 25.74, | |
| "grad_norm": 0.6646810173988342, | |
| "learning_rate": 4.9164824120603015e-06, | |
| "loss": 0.0008, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 25.76, | |
| "grad_norm": 1.8066281080245972, | |
| "learning_rate": 4.913969849246232e-06, | |
| "loss": 0.0008, | |
| "step": 51125 | |
| }, | |
| { | |
| "epoch": 25.77, | |
| "grad_norm": 1.1246289014816284, | |
| "learning_rate": 4.911457286432161e-06, | |
| "loss": 0.0007, | |
| "step": 51150 | |
| }, | |
| { | |
| "epoch": 25.78, | |
| "grad_norm": 0.20243453979492188, | |
| "learning_rate": 4.908944723618091e-06, | |
| "loss": 0.0009, | |
| "step": 51175 | |
| }, | |
| { | |
| "epoch": 25.79, | |
| "grad_norm": 1.6287262439727783, | |
| "learning_rate": 4.9064321608040205e-06, | |
| "loss": 0.0006, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 25.81, | |
| "grad_norm": 0.7835111618041992, | |
| "learning_rate": 4.90391959798995e-06, | |
| "loss": 0.0005, | |
| "step": 51225 | |
| }, | |
| { | |
| "epoch": 25.82, | |
| "grad_norm": 0.7937703132629395, | |
| "learning_rate": 4.90140703517588e-06, | |
| "loss": 0.0008, | |
| "step": 51250 | |
| }, | |
| { | |
| "epoch": 25.83, | |
| "grad_norm": 0.4282771944999695, | |
| "learning_rate": 4.898894472361809e-06, | |
| "loss": 0.0009, | |
| "step": 51275 | |
| }, | |
| { | |
| "epoch": 25.84, | |
| "grad_norm": 0.9795933365821838, | |
| "learning_rate": 4.896381909547739e-06, | |
| "loss": 0.0006, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 25.86, | |
| "grad_norm": 0.2915053963661194, | |
| "learning_rate": 4.893869346733669e-06, | |
| "loss": 0.0007, | |
| "step": 51325 | |
| }, | |
| { | |
| "epoch": 25.87, | |
| "grad_norm": 0.899832010269165, | |
| "learning_rate": 4.891356783919598e-06, | |
| "loss": 0.0007, | |
| "step": 51350 | |
| }, | |
| { | |
| "epoch": 25.88, | |
| "grad_norm": 0.8909317851066589, | |
| "learning_rate": 4.888844221105528e-06, | |
| "loss": 0.0008, | |
| "step": 51375 | |
| }, | |
| { | |
| "epoch": 25.89, | |
| "grad_norm": 0.46953698992729187, | |
| "learning_rate": 4.886331658291458e-06, | |
| "loss": 0.0009, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 25.91, | |
| "grad_norm": 0.1676747351884842, | |
| "learning_rate": 4.883819095477387e-06, | |
| "loss": 0.0007, | |
| "step": 51425 | |
| }, | |
| { | |
| "epoch": 25.92, | |
| "grad_norm": 1.4294859170913696, | |
| "learning_rate": 4.881306532663317e-06, | |
| "loss": 0.0007, | |
| "step": 51450 | |
| }, | |
| { | |
| "epoch": 25.93, | |
| "grad_norm": 1.2342486381530762, | |
| "learning_rate": 4.8787939698492465e-06, | |
| "loss": 0.0006, | |
| "step": 51475 | |
| }, | |
| { | |
| "epoch": 25.94, | |
| "grad_norm": 2.795978307723999, | |
| "learning_rate": 4.876281407035176e-06, | |
| "loss": 0.0009, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 25.96, | |
| "grad_norm": 0.16276022791862488, | |
| "learning_rate": 4.873768844221106e-06, | |
| "loss": 0.0005, | |
| "step": 51525 | |
| }, | |
| { | |
| "epoch": 25.97, | |
| "grad_norm": 0.4440419673919678, | |
| "learning_rate": 4.871256281407036e-06, | |
| "loss": 0.0007, | |
| "step": 51550 | |
| }, | |
| { | |
| "epoch": 25.98, | |
| "grad_norm": 0.5978400707244873, | |
| "learning_rate": 4.8687437185929655e-06, | |
| "loss": 0.0007, | |
| "step": 51575 | |
| }, | |
| { | |
| "epoch": 25.99, | |
| "grad_norm": 0.41632863879203796, | |
| "learning_rate": 4.866231155778895e-06, | |
| "loss": 0.0008, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 26.01, | |
| "grad_norm": 0.6219983100891113, | |
| "learning_rate": 4.863718592964824e-06, | |
| "loss": 0.0005, | |
| "step": 51625 | |
| }, | |
| { | |
| "epoch": 26.02, | |
| "grad_norm": 1.240051507949829, | |
| "learning_rate": 4.861206030150754e-06, | |
| "loss": 0.0009, | |
| "step": 51650 | |
| }, | |
| { | |
| "epoch": 26.03, | |
| "grad_norm": 0.7890909314155579, | |
| "learning_rate": 4.858793969849247e-06, | |
| "loss": 0.0006, | |
| "step": 51675 | |
| }, | |
| { | |
| "epoch": 26.05, | |
| "grad_norm": 0.1678403913974762, | |
| "learning_rate": 4.856281407035176e-06, | |
| "loss": 0.0006, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 26.06, | |
| "grad_norm": 0.5384219884872437, | |
| "learning_rate": 4.853768844221106e-06, | |
| "loss": 0.0006, | |
| "step": 51725 | |
| }, | |
| { | |
| "epoch": 26.07, | |
| "grad_norm": 0.4800323247909546, | |
| "learning_rate": 4.8512562814070355e-06, | |
| "loss": 0.0008, | |
| "step": 51750 | |
| }, | |
| { | |
| "epoch": 26.08, | |
| "grad_norm": 0.088472880423069, | |
| "learning_rate": 4.848743718592966e-06, | |
| "loss": 0.0007, | |
| "step": 51775 | |
| }, | |
| { | |
| "epoch": 26.1, | |
| "grad_norm": 0.28733333945274353, | |
| "learning_rate": 4.846231155778895e-06, | |
| "loss": 0.0006, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 26.11, | |
| "grad_norm": 3.326415777206421, | |
| "learning_rate": 4.843718592964824e-06, | |
| "loss": 0.0004, | |
| "step": 51825 | |
| }, | |
| { | |
| "epoch": 26.12, | |
| "grad_norm": 0.8401397466659546, | |
| "learning_rate": 4.8412060301507545e-06, | |
| "loss": 0.0004, | |
| "step": 51850 | |
| }, | |
| { | |
| "epoch": 26.13, | |
| "grad_norm": 1.742140531539917, | |
| "learning_rate": 4.838693467336684e-06, | |
| "loss": 0.0005, | |
| "step": 51875 | |
| }, | |
| { | |
| "epoch": 26.15, | |
| "grad_norm": 1.9404064416885376, | |
| "learning_rate": 4.836180904522613e-06, | |
| "loss": 0.0005, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 26.16, | |
| "grad_norm": 0.9949504733085632, | |
| "learning_rate": 4.833668341708543e-06, | |
| "loss": 0.0005, | |
| "step": 51925 | |
| }, | |
| { | |
| "epoch": 26.17, | |
| "grad_norm": 0.13971653580665588, | |
| "learning_rate": 4.831155778894473e-06, | |
| "loss": 0.0006, | |
| "step": 51950 | |
| }, | |
| { | |
| "epoch": 26.18, | |
| "grad_norm": 0.31128737330436707, | |
| "learning_rate": 4.828643216080403e-06, | |
| "loss": 0.0005, | |
| "step": 51975 | |
| }, | |
| { | |
| "epoch": 26.2, | |
| "grad_norm": 0.8576771020889282, | |
| "learning_rate": 4.826130653266332e-06, | |
| "loss": 0.0006, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 26.2, | |
| "eval_loss": 0.354295015335083, | |
| "eval_runtime": 645.4254, | |
| "eval_samples_per_second": 2.183, | |
| "eval_steps_per_second": 2.183, | |
| "eval_wer": 22.960913178830854, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 26.21, | |
| "grad_norm": 0.8186060786247253, | |
| "learning_rate": 4.8236180904522614e-06, | |
| "loss": 0.0006, | |
| "step": 52025 | |
| }, | |
| { | |
| "epoch": 26.22, | |
| "grad_norm": 0.21395763754844666, | |
| "learning_rate": 4.821105527638192e-06, | |
| "loss": 0.0009, | |
| "step": 52050 | |
| }, | |
| { | |
| "epoch": 26.23, | |
| "grad_norm": 0.28404101729393005, | |
| "learning_rate": 4.818592964824121e-06, | |
| "loss": 0.0005, | |
| "step": 52075 | |
| }, | |
| { | |
| "epoch": 26.25, | |
| "grad_norm": 0.2926516830921173, | |
| "learning_rate": 4.81608040201005e-06, | |
| "loss": 0.0005, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 26.26, | |
| "grad_norm": 0.8246799111366272, | |
| "learning_rate": 4.81356783919598e-06, | |
| "loss": 0.0005, | |
| "step": 52125 | |
| }, | |
| { | |
| "epoch": 26.27, | |
| "grad_norm": 0.19814717769622803, | |
| "learning_rate": 4.81105527638191e-06, | |
| "loss": 0.0007, | |
| "step": 52150 | |
| }, | |
| { | |
| "epoch": 26.28, | |
| "grad_norm": 0.2942245602607727, | |
| "learning_rate": 4.80854271356784e-06, | |
| "loss": 0.0006, | |
| "step": 52175 | |
| }, | |
| { | |
| "epoch": 26.3, | |
| "grad_norm": 1.1679245233535767, | |
| "learning_rate": 4.806030150753769e-06, | |
| "loss": 0.0007, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 26.31, | |
| "grad_norm": 0.19946590065956116, | |
| "learning_rate": 4.8035175879396986e-06, | |
| "loss": 0.0007, | |
| "step": 52225 | |
| }, | |
| { | |
| "epoch": 26.32, | |
| "grad_norm": 0.5419365763664246, | |
| "learning_rate": 4.801005025125629e-06, | |
| "loss": 0.0004, | |
| "step": 52250 | |
| }, | |
| { | |
| "epoch": 26.34, | |
| "grad_norm": 1.0101172924041748, | |
| "learning_rate": 4.798492462311558e-06, | |
| "loss": 0.0008, | |
| "step": 52275 | |
| }, | |
| { | |
| "epoch": 26.35, | |
| "grad_norm": 0.7043997049331665, | |
| "learning_rate": 4.795979899497487e-06, | |
| "loss": 0.0005, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 26.36, | |
| "grad_norm": 1.9109712839126587, | |
| "learning_rate": 4.7934673366834175e-06, | |
| "loss": 0.0005, | |
| "step": 52325 | |
| }, | |
| { | |
| "epoch": 26.37, | |
| "grad_norm": 1.100644826889038, | |
| "learning_rate": 4.790954773869348e-06, | |
| "loss": 0.0009, | |
| "step": 52350 | |
| }, | |
| { | |
| "epoch": 26.39, | |
| "grad_norm": 0.7146306037902832, | |
| "learning_rate": 4.788442211055277e-06, | |
| "loss": 0.0007, | |
| "step": 52375 | |
| }, | |
| { | |
| "epoch": 26.4, | |
| "grad_norm": 0.21567130088806152, | |
| "learning_rate": 4.785929648241206e-06, | |
| "loss": 0.0007, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 26.41, | |
| "grad_norm": 1.087694764137268, | |
| "learning_rate": 4.7834170854271365e-06, | |
| "loss": 0.001, | |
| "step": 52425 | |
| }, | |
| { | |
| "epoch": 26.42, | |
| "grad_norm": 0.6008167862892151, | |
| "learning_rate": 4.780904522613066e-06, | |
| "loss": 0.001, | |
| "step": 52450 | |
| }, | |
| { | |
| "epoch": 26.44, | |
| "grad_norm": 0.45899203419685364, | |
| "learning_rate": 4.778391959798995e-06, | |
| "loss": 0.0006, | |
| "step": 52475 | |
| }, | |
| { | |
| "epoch": 26.45, | |
| "grad_norm": 0.5328181982040405, | |
| "learning_rate": 4.7758793969849245e-06, | |
| "loss": 0.0009, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 26.46, | |
| "grad_norm": 0.1914442628622055, | |
| "learning_rate": 4.773366834170855e-06, | |
| "loss": 0.001, | |
| "step": 52525 | |
| }, | |
| { | |
| "epoch": 26.47, | |
| "grad_norm": 0.6430690884590149, | |
| "learning_rate": 4.770854271356785e-06, | |
| "loss": 0.0009, | |
| "step": 52550 | |
| }, | |
| { | |
| "epoch": 26.49, | |
| "grad_norm": 0.5766837000846863, | |
| "learning_rate": 4.768341708542714e-06, | |
| "loss": 0.0009, | |
| "step": 52575 | |
| }, | |
| { | |
| "epoch": 26.5, | |
| "grad_norm": 0.6997876763343811, | |
| "learning_rate": 4.7658291457286435e-06, | |
| "loss": 0.0007, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 26.51, | |
| "grad_norm": 1.365090250968933, | |
| "learning_rate": 4.763316582914574e-06, | |
| "loss": 0.001, | |
| "step": 52625 | |
| }, | |
| { | |
| "epoch": 26.52, | |
| "grad_norm": 1.0649492740631104, | |
| "learning_rate": 4.760804020100503e-06, | |
| "loss": 0.0009, | |
| "step": 52650 | |
| }, | |
| { | |
| "epoch": 26.54, | |
| "grad_norm": 1.1800321340560913, | |
| "learning_rate": 4.758291457286432e-06, | |
| "loss": 0.001, | |
| "step": 52675 | |
| }, | |
| { | |
| "epoch": 26.55, | |
| "grad_norm": 0.5055447220802307, | |
| "learning_rate": 4.7557788944723625e-06, | |
| "loss": 0.0008, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 26.56, | |
| "grad_norm": 0.6218178272247314, | |
| "learning_rate": 4.753266331658292e-06, | |
| "loss": 0.001, | |
| "step": 52725 | |
| }, | |
| { | |
| "epoch": 26.57, | |
| "grad_norm": 0.49634939432144165, | |
| "learning_rate": 4.750753768844221e-06, | |
| "loss": 0.0007, | |
| "step": 52750 | |
| }, | |
| { | |
| "epoch": 26.59, | |
| "grad_norm": 0.4345056116580963, | |
| "learning_rate": 4.748241206030151e-06, | |
| "loss": 0.0006, | |
| "step": 52775 | |
| }, | |
| { | |
| "epoch": 26.6, | |
| "grad_norm": 0.8264731168746948, | |
| "learning_rate": 4.745728643216081e-06, | |
| "loss": 0.0008, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 26.61, | |
| "grad_norm": 0.7606098651885986, | |
| "learning_rate": 4.743216080402011e-06, | |
| "loss": 0.0008, | |
| "step": 52825 | |
| }, | |
| { | |
| "epoch": 26.62, | |
| "grad_norm": 0.13701969385147095, | |
| "learning_rate": 4.74070351758794e-06, | |
| "loss": 0.0008, | |
| "step": 52850 | |
| }, | |
| { | |
| "epoch": 26.64, | |
| "grad_norm": 1.5023452043533325, | |
| "learning_rate": 4.7381909547738694e-06, | |
| "loss": 0.0008, | |
| "step": 52875 | |
| }, | |
| { | |
| "epoch": 26.65, | |
| "grad_norm": 0.7520101070404053, | |
| "learning_rate": 4.7356783919598e-06, | |
| "loss": 0.0009, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 26.66, | |
| "grad_norm": 0.5014305114746094, | |
| "learning_rate": 4.733165829145729e-06, | |
| "loss": 0.0007, | |
| "step": 52925 | |
| }, | |
| { | |
| "epoch": 26.68, | |
| "grad_norm": 0.6294064521789551, | |
| "learning_rate": 4.730653266331658e-06, | |
| "loss": 0.0009, | |
| "step": 52950 | |
| }, | |
| { | |
| "epoch": 26.69, | |
| "grad_norm": 0.1294821947813034, | |
| "learning_rate": 4.728140703517588e-06, | |
| "loss": 0.0007, | |
| "step": 52975 | |
| }, | |
| { | |
| "epoch": 26.7, | |
| "grad_norm": 0.0864739790558815, | |
| "learning_rate": 4.725628140703518e-06, | |
| "loss": 0.0009, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 26.7, | |
| "eval_loss": 0.3559441566467285, | |
| "eval_runtime": 647.2062, | |
| "eval_samples_per_second": 2.177, | |
| "eval_steps_per_second": 2.177, | |
| "eval_wer": 22.760290556900724, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 26.71, | |
| "grad_norm": 0.9976471662521362, | |
| "learning_rate": 4.723115577889448e-06, | |
| "loss": 0.0007, | |
| "step": 53025 | |
| }, | |
| { | |
| "epoch": 26.73, | |
| "grad_norm": 0.1680731177330017, | |
| "learning_rate": 4.720603015075377e-06, | |
| "loss": 0.0008, | |
| "step": 53050 | |
| }, | |
| { | |
| "epoch": 26.74, | |
| "grad_norm": 0.7187339663505554, | |
| "learning_rate": 4.7180904522613066e-06, | |
| "loss": 0.0007, | |
| "step": 53075 | |
| }, | |
| { | |
| "epoch": 26.75, | |
| "grad_norm": 2.314380645751953, | |
| "learning_rate": 4.715577889447237e-06, | |
| "loss": 0.001, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 26.76, | |
| "grad_norm": 0.23373447358608246, | |
| "learning_rate": 4.713065326633166e-06, | |
| "loss": 0.0007, | |
| "step": 53125 | |
| }, | |
| { | |
| "epoch": 26.78, | |
| "grad_norm": 0.07850030809640884, | |
| "learning_rate": 4.710552763819095e-06, | |
| "loss": 0.0008, | |
| "step": 53150 | |
| }, | |
| { | |
| "epoch": 26.79, | |
| "grad_norm": 0.7005709409713745, | |
| "learning_rate": 4.7080402010050256e-06, | |
| "loss": 0.001, | |
| "step": 53175 | |
| }, | |
| { | |
| "epoch": 26.8, | |
| "grad_norm": 1.0930556058883667, | |
| "learning_rate": 4.705527638190956e-06, | |
| "loss": 0.0006, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 26.81, | |
| "grad_norm": 0.5338262319564819, | |
| "learning_rate": 4.703015075376885e-06, | |
| "loss": 0.0005, | |
| "step": 53225 | |
| }, | |
| { | |
| "epoch": 26.83, | |
| "grad_norm": 0.08169445395469666, | |
| "learning_rate": 4.700502512562814e-06, | |
| "loss": 0.0005, | |
| "step": 53250 | |
| }, | |
| { | |
| "epoch": 26.84, | |
| "grad_norm": 2.5466127395629883, | |
| "learning_rate": 4.6979899497487445e-06, | |
| "loss": 0.0007, | |
| "step": 53275 | |
| }, | |
| { | |
| "epoch": 26.85, | |
| "grad_norm": 1.4362409114837646, | |
| "learning_rate": 4.695477386934674e-06, | |
| "loss": 0.0005, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 26.86, | |
| "grad_norm": 1.0999122858047485, | |
| "learning_rate": 4.692964824120603e-06, | |
| "loss": 0.0008, | |
| "step": 53325 | |
| }, | |
| { | |
| "epoch": 26.88, | |
| "grad_norm": 0.7979241609573364, | |
| "learning_rate": 4.6904522613065325e-06, | |
| "loss": 0.0009, | |
| "step": 53350 | |
| }, | |
| { | |
| "epoch": 26.89, | |
| "grad_norm": 0.5806974172592163, | |
| "learning_rate": 4.687939698492463e-06, | |
| "loss": 0.0008, | |
| "step": 53375 | |
| }, | |
| { | |
| "epoch": 26.9, | |
| "grad_norm": 1.0148537158966064, | |
| "learning_rate": 4.685427135678393e-06, | |
| "loss": 0.0008, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 26.91, | |
| "grad_norm": 1.2113052606582642, | |
| "learning_rate": 4.682914572864322e-06, | |
| "loss": 0.0013, | |
| "step": 53425 | |
| }, | |
| { | |
| "epoch": 26.93, | |
| "grad_norm": 0.730830729007721, | |
| "learning_rate": 4.6804020100502515e-06, | |
| "loss": 0.0006, | |
| "step": 53450 | |
| }, | |
| { | |
| "epoch": 26.94, | |
| "grad_norm": 0.5694770216941833, | |
| "learning_rate": 4.677889447236182e-06, | |
| "loss": 0.0007, | |
| "step": 53475 | |
| }, | |
| { | |
| "epoch": 26.95, | |
| "grad_norm": 0.7788098454475403, | |
| "learning_rate": 4.675376884422111e-06, | |
| "loss": 0.0006, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 26.96, | |
| "grad_norm": 0.47340822219848633, | |
| "learning_rate": 4.67286432160804e-06, | |
| "loss": 0.0008, | |
| "step": 53525 | |
| }, | |
| { | |
| "epoch": 26.98, | |
| "grad_norm": 0.4103614389896393, | |
| "learning_rate": 4.6703517587939705e-06, | |
| "loss": 0.0007, | |
| "step": 53550 | |
| }, | |
| { | |
| "epoch": 26.99, | |
| "grad_norm": 1.1811082363128662, | |
| "learning_rate": 4.6678391959799e-06, | |
| "loss": 0.0006, | |
| "step": 53575 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "grad_norm": 0.34475991129875183, | |
| "learning_rate": 4.66532663316583e-06, | |
| "loss": 0.0004, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 27.02, | |
| "grad_norm": 0.060179028660058975, | |
| "learning_rate": 4.662814070351759e-06, | |
| "loss": 0.0004, | |
| "step": 53625 | |
| }, | |
| { | |
| "epoch": 27.03, | |
| "grad_norm": 0.4929451048374176, | |
| "learning_rate": 4.660301507537689e-06, | |
| "loss": 0.0004, | |
| "step": 53650 | |
| }, | |
| { | |
| "epoch": 27.04, | |
| "grad_norm": 0.3733210265636444, | |
| "learning_rate": 4.657788944723619e-06, | |
| "loss": 0.0005, | |
| "step": 53675 | |
| }, | |
| { | |
| "epoch": 27.05, | |
| "grad_norm": 1.3154228925704956, | |
| "learning_rate": 4.655276381909548e-06, | |
| "loss": 0.0005, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 27.07, | |
| "grad_norm": 0.241769477725029, | |
| "learning_rate": 4.6527638190954774e-06, | |
| "loss": 0.0004, | |
| "step": 53725 | |
| }, | |
| { | |
| "epoch": 27.08, | |
| "grad_norm": 0.19261914491653442, | |
| "learning_rate": 4.650251256281408e-06, | |
| "loss": 0.0003, | |
| "step": 53750 | |
| }, | |
| { | |
| "epoch": 27.09, | |
| "grad_norm": 0.19151557981967926, | |
| "learning_rate": 4.647738693467337e-06, | |
| "loss": 0.0002, | |
| "step": 53775 | |
| }, | |
| { | |
| "epoch": 27.1, | |
| "grad_norm": 0.2657971680164337, | |
| "learning_rate": 4.645226130653266e-06, | |
| "loss": 0.0004, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 27.12, | |
| "grad_norm": 0.09917689114809036, | |
| "learning_rate": 4.6427135678391964e-06, | |
| "loss": 0.0003, | |
| "step": 53825 | |
| }, | |
| { | |
| "epoch": 27.13, | |
| "grad_norm": 0.08713795244693756, | |
| "learning_rate": 4.640201005025126e-06, | |
| "loss": 0.0002, | |
| "step": 53850 | |
| }, | |
| { | |
| "epoch": 27.14, | |
| "grad_norm": 0.15994809567928314, | |
| "learning_rate": 4.637688442211056e-06, | |
| "loss": 0.0004, | |
| "step": 53875 | |
| }, | |
| { | |
| "epoch": 27.15, | |
| "grad_norm": 0.1758558303117752, | |
| "learning_rate": 4.635175879396985e-06, | |
| "loss": 0.0004, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 27.17, | |
| "grad_norm": 0.04318870231509209, | |
| "learning_rate": 4.6326633165829146e-06, | |
| "loss": 0.0004, | |
| "step": 53925 | |
| }, | |
| { | |
| "epoch": 27.18, | |
| "grad_norm": 0.07124695926904678, | |
| "learning_rate": 4.630150753768845e-06, | |
| "loss": 0.0003, | |
| "step": 53950 | |
| }, | |
| { | |
| "epoch": 27.19, | |
| "grad_norm": 0.6460732221603394, | |
| "learning_rate": 4.627638190954774e-06, | |
| "loss": 0.0005, | |
| "step": 53975 | |
| }, | |
| { | |
| "epoch": 27.2, | |
| "grad_norm": 0.9753907322883606, | |
| "learning_rate": 4.625125628140703e-06, | |
| "loss": 0.0004, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 27.2, | |
| "eval_loss": 0.3613799214363098, | |
| "eval_runtime": 644.7858, | |
| "eval_samples_per_second": 2.185, | |
| "eval_steps_per_second": 2.185, | |
| "eval_wer": 22.407471463161535, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 27.22, | |
| "grad_norm": 0.14066103100776672, | |
| "learning_rate": 4.6226130653266336e-06, | |
| "loss": 0.0005, | |
| "step": 54025 | |
| }, | |
| { | |
| "epoch": 27.23, | |
| "grad_norm": 0.20447216928005219, | |
| "learning_rate": 4.620100502512564e-06, | |
| "loss": 0.0003, | |
| "step": 54050 | |
| }, | |
| { | |
| "epoch": 27.24, | |
| "grad_norm": 0.13957104086875916, | |
| "learning_rate": 4.617587939698493e-06, | |
| "loss": 0.0002, | |
| "step": 54075 | |
| }, | |
| { | |
| "epoch": 27.25, | |
| "grad_norm": 0.8711459040641785, | |
| "learning_rate": 4.615075376884422e-06, | |
| "loss": 0.0003, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 27.27, | |
| "grad_norm": 0.19493000209331512, | |
| "learning_rate": 4.612562814070352e-06, | |
| "loss": 0.0003, | |
| "step": 54125 | |
| }, | |
| { | |
| "epoch": 27.28, | |
| "grad_norm": 0.16989558935165405, | |
| "learning_rate": 4.610050251256282e-06, | |
| "loss": 0.0004, | |
| "step": 54150 | |
| }, | |
| { | |
| "epoch": 27.29, | |
| "grad_norm": 0.29881447553634644, | |
| "learning_rate": 4.607537688442211e-06, | |
| "loss": 0.0006, | |
| "step": 54175 | |
| }, | |
| { | |
| "epoch": 27.3, | |
| "grad_norm": 1.1802809238433838, | |
| "learning_rate": 4.6050251256281405e-06, | |
| "loss": 0.0006, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 27.32, | |
| "grad_norm": 0.17507942020893097, | |
| "learning_rate": 4.602512562814071e-06, | |
| "loss": 0.0005, | |
| "step": 54225 | |
| }, | |
| { | |
| "epoch": 27.33, | |
| "grad_norm": 0.2276214212179184, | |
| "learning_rate": 4.600000000000001e-06, | |
| "loss": 0.0004, | |
| "step": 54250 | |
| }, | |
| { | |
| "epoch": 27.34, | |
| "grad_norm": 0.4339704215526581, | |
| "learning_rate": 4.59748743718593e-06, | |
| "loss": 0.0006, | |
| "step": 54275 | |
| }, | |
| { | |
| "epoch": 27.36, | |
| "grad_norm": 1.0907328128814697, | |
| "learning_rate": 4.5949748743718595e-06, | |
| "loss": 0.0006, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 27.37, | |
| "grad_norm": 0.9420053362846375, | |
| "learning_rate": 4.59246231155779e-06, | |
| "loss": 0.0004, | |
| "step": 54325 | |
| }, | |
| { | |
| "epoch": 27.38, | |
| "grad_norm": 0.5085152387619019, | |
| "learning_rate": 4.589949748743719e-06, | |
| "loss": 0.0006, | |
| "step": 54350 | |
| }, | |
| { | |
| "epoch": 27.39, | |
| "grad_norm": 0.7022001147270203, | |
| "learning_rate": 4.587437185929648e-06, | |
| "loss": 0.0006, | |
| "step": 54375 | |
| }, | |
| { | |
| "epoch": 27.41, | |
| "grad_norm": 0.4072805941104889, | |
| "learning_rate": 4.5849246231155785e-06, | |
| "loss": 0.0004, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 27.42, | |
| "grad_norm": 0.6905921101570129, | |
| "learning_rate": 4.582412060301508e-06, | |
| "loss": 0.0007, | |
| "step": 54425 | |
| }, | |
| { | |
| "epoch": 27.43, | |
| "grad_norm": 0.5085986852645874, | |
| "learning_rate": 4.579899497487438e-06, | |
| "loss": 0.0006, | |
| "step": 54450 | |
| }, | |
| { | |
| "epoch": 27.44, | |
| "grad_norm": 0.7813529968261719, | |
| "learning_rate": 4.577386934673367e-06, | |
| "loss": 0.0006, | |
| "step": 54475 | |
| }, | |
| { | |
| "epoch": 27.46, | |
| "grad_norm": 0.7074769139289856, | |
| "learning_rate": 4.574874371859297e-06, | |
| "loss": 0.0009, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 27.47, | |
| "grad_norm": 1.1880906820297241, | |
| "learning_rate": 4.572361809045227e-06, | |
| "loss": 0.0008, | |
| "step": 54525 | |
| }, | |
| { | |
| "epoch": 27.48, | |
| "grad_norm": 1.0316381454467773, | |
| "learning_rate": 4.569849246231156e-06, | |
| "loss": 0.0009, | |
| "step": 54550 | |
| }, | |
| { | |
| "epoch": 27.49, | |
| "grad_norm": 1.5573595762252808, | |
| "learning_rate": 4.5673366834170854e-06, | |
| "loss": 0.0005, | |
| "step": 54575 | |
| }, | |
| { | |
| "epoch": 27.51, | |
| "grad_norm": 3.481981039047241, | |
| "learning_rate": 4.564824120603016e-06, | |
| "loss": 0.001, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 27.52, | |
| "grad_norm": 0.9076539874076843, | |
| "learning_rate": 4.562311557788945e-06, | |
| "loss": 0.0007, | |
| "step": 54625 | |
| }, | |
| { | |
| "epoch": 27.53, | |
| "grad_norm": 2.5089993476867676, | |
| "learning_rate": 4.559798994974875e-06, | |
| "loss": 0.0007, | |
| "step": 54650 | |
| }, | |
| { | |
| "epoch": 27.54, | |
| "grad_norm": 0.755713701248169, | |
| "learning_rate": 4.5572864321608044e-06, | |
| "loss": 0.001, | |
| "step": 54675 | |
| }, | |
| { | |
| "epoch": 27.56, | |
| "grad_norm": 0.1939065009355545, | |
| "learning_rate": 4.554773869346734e-06, | |
| "loss": 0.0007, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 27.57, | |
| "grad_norm": 0.29305967688560486, | |
| "learning_rate": 4.552261306532664e-06, | |
| "loss": 0.0008, | |
| "step": 54725 | |
| }, | |
| { | |
| "epoch": 27.58, | |
| "grad_norm": 1.5190849304199219, | |
| "learning_rate": 4.549748743718593e-06, | |
| "loss": 0.0009, | |
| "step": 54750 | |
| }, | |
| { | |
| "epoch": 27.59, | |
| "grad_norm": 0.15516288578510284, | |
| "learning_rate": 4.5472361809045226e-06, | |
| "loss": 0.0007, | |
| "step": 54775 | |
| }, | |
| { | |
| "epoch": 27.61, | |
| "grad_norm": 0.9610015749931335, | |
| "learning_rate": 4.544723618090453e-06, | |
| "loss": 0.0007, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 27.62, | |
| "grad_norm": 0.04598340019583702, | |
| "learning_rate": 4.542211055276382e-06, | |
| "loss": 0.0005, | |
| "step": 54825 | |
| }, | |
| { | |
| "epoch": 27.63, | |
| "grad_norm": 0.8410586714744568, | |
| "learning_rate": 4.539698492462312e-06, | |
| "loss": 0.0006, | |
| "step": 54850 | |
| }, | |
| { | |
| "epoch": 27.64, | |
| "grad_norm": 0.09928705543279648, | |
| "learning_rate": 4.5371859296482416e-06, | |
| "loss": 0.0008, | |
| "step": 54875 | |
| }, | |
| { | |
| "epoch": 27.66, | |
| "grad_norm": 1.5359119176864624, | |
| "learning_rate": 4.534673366834172e-06, | |
| "loss": 0.0007, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 27.67, | |
| "grad_norm": 0.13479715585708618, | |
| "learning_rate": 4.532160804020101e-06, | |
| "loss": 0.0006, | |
| "step": 54925 | |
| }, | |
| { | |
| "epoch": 27.68, | |
| "grad_norm": 0.5345453023910522, | |
| "learning_rate": 4.52964824120603e-06, | |
| "loss": 0.0007, | |
| "step": 54950 | |
| }, | |
| { | |
| "epoch": 27.7, | |
| "grad_norm": 0.29486083984375, | |
| "learning_rate": 4.52713567839196e-06, | |
| "loss": 0.0012, | |
| "step": 54975 | |
| }, | |
| { | |
| "epoch": 27.71, | |
| "grad_norm": 1.310102939605713, | |
| "learning_rate": 4.52462311557789e-06, | |
| "loss": 0.0008, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 27.71, | |
| "eval_loss": 0.3659009635448456, | |
| "eval_runtime": 646.1523, | |
| "eval_samples_per_second": 2.181, | |
| "eval_steps_per_second": 2.181, | |
| "eval_wer": 22.98858526461432, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 27.72, | |
| "grad_norm": 0.18702644109725952, | |
| "learning_rate": 4.522110552763819e-06, | |
| "loss": 0.0009, | |
| "step": 55025 | |
| }, | |
| { | |
| "epoch": 27.73, | |
| "grad_norm": 0.17531992495059967, | |
| "learning_rate": 4.5195979899497485e-06, | |
| "loss": 0.0006, | |
| "step": 55050 | |
| }, | |
| { | |
| "epoch": 27.75, | |
| "grad_norm": 0.844012439250946, | |
| "learning_rate": 4.517085427135679e-06, | |
| "loss": 0.0005, | |
| "step": 55075 | |
| }, | |
| { | |
| "epoch": 27.76, | |
| "grad_norm": 0.20820151269435883, | |
| "learning_rate": 4.514572864321609e-06, | |
| "loss": 0.0008, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 27.77, | |
| "grad_norm": 2.1439287662506104, | |
| "learning_rate": 4.512060301507538e-06, | |
| "loss": 0.0009, | |
| "step": 55125 | |
| }, | |
| { | |
| "epoch": 27.78, | |
| "grad_norm": 0.31553953886032104, | |
| "learning_rate": 4.5095477386934675e-06, | |
| "loss": 0.0007, | |
| "step": 55150 | |
| }, | |
| { | |
| "epoch": 27.8, | |
| "grad_norm": 0.08655881881713867, | |
| "learning_rate": 4.507035175879398e-06, | |
| "loss": 0.0005, | |
| "step": 55175 | |
| }, | |
| { | |
| "epoch": 27.81, | |
| "grad_norm": 0.0824466124176979, | |
| "learning_rate": 4.504522613065327e-06, | |
| "loss": 0.0005, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 27.82, | |
| "grad_norm": 0.7539893984794617, | |
| "learning_rate": 4.502010050251256e-06, | |
| "loss": 0.0006, | |
| "step": 55225 | |
| }, | |
| { | |
| "epoch": 27.83, | |
| "grad_norm": 0.1819518655538559, | |
| "learning_rate": 4.499497487437186e-06, | |
| "loss": 0.0011, | |
| "step": 55250 | |
| }, | |
| { | |
| "epoch": 27.85, | |
| "grad_norm": 1.464959740638733, | |
| "learning_rate": 4.496984924623116e-06, | |
| "loss": 0.0009, | |
| "step": 55275 | |
| }, | |
| { | |
| "epoch": 27.86, | |
| "grad_norm": 0.3855617344379425, | |
| "learning_rate": 4.494472361809046e-06, | |
| "loss": 0.0008, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 27.87, | |
| "grad_norm": 1.3555275201797485, | |
| "learning_rate": 4.491959798994975e-06, | |
| "loss": 0.0012, | |
| "step": 55325 | |
| }, | |
| { | |
| "epoch": 27.88, | |
| "grad_norm": 0.7826224565505981, | |
| "learning_rate": 4.489447236180905e-06, | |
| "loss": 0.001, | |
| "step": 55350 | |
| }, | |
| { | |
| "epoch": 27.9, | |
| "grad_norm": 0.3365747630596161, | |
| "learning_rate": 4.486934673366835e-06, | |
| "loss": 0.0005, | |
| "step": 55375 | |
| }, | |
| { | |
| "epoch": 27.91, | |
| "grad_norm": 0.28341349959373474, | |
| "learning_rate": 4.484522613065327e-06, | |
| "loss": 0.0008, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 27.92, | |
| "grad_norm": 0.6870297789573669, | |
| "learning_rate": 4.4820100502512565e-06, | |
| "loss": 0.0006, | |
| "step": 55425 | |
| }, | |
| { | |
| "epoch": 27.93, | |
| "grad_norm": 0.2579886019229889, | |
| "learning_rate": 4.479497487437186e-06, | |
| "loss": 0.0008, | |
| "step": 55450 | |
| }, | |
| { | |
| "epoch": 27.95, | |
| "grad_norm": 1.2634528875350952, | |
| "learning_rate": 4.476984924623116e-06, | |
| "loss": 0.0008, | |
| "step": 55475 | |
| }, | |
| { | |
| "epoch": 27.96, | |
| "grad_norm": 0.11361195892095566, | |
| "learning_rate": 4.474472361809046e-06, | |
| "loss": 0.0006, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 27.97, | |
| "grad_norm": 0.15430136024951935, | |
| "learning_rate": 4.4719597989949755e-06, | |
| "loss": 0.0007, | |
| "step": 55525 | |
| }, | |
| { | |
| "epoch": 27.98, | |
| "grad_norm": 0.3706219494342804, | |
| "learning_rate": 4.469447236180905e-06, | |
| "loss": 0.0007, | |
| "step": 55550 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "grad_norm": 0.853947103023529, | |
| "learning_rate": 4.466934673366834e-06, | |
| "loss": 0.0006, | |
| "step": 55575 | |
| }, | |
| { | |
| "epoch": 28.01, | |
| "grad_norm": 0.9473939538002014, | |
| "learning_rate": 4.464422110552764e-06, | |
| "loss": 0.0004, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 28.02, | |
| "grad_norm": 1.0833967924118042, | |
| "learning_rate": 4.461909547738694e-06, | |
| "loss": 0.0006, | |
| "step": 55625 | |
| }, | |
| { | |
| "epoch": 28.04, | |
| "grad_norm": 0.2835502028465271, | |
| "learning_rate": 4.459396984924623e-06, | |
| "loss": 0.0005, | |
| "step": 55650 | |
| }, | |
| { | |
| "epoch": 28.05, | |
| "grad_norm": 0.5609690546989441, | |
| "learning_rate": 4.456884422110553e-06, | |
| "loss": 0.0005, | |
| "step": 55675 | |
| }, | |
| { | |
| "epoch": 28.06, | |
| "grad_norm": 0.07128031551837921, | |
| "learning_rate": 4.454371859296483e-06, | |
| "loss": 0.0004, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 28.07, | |
| "grad_norm": 0.7161921858787537, | |
| "learning_rate": 4.451859296482413e-06, | |
| "loss": 0.0003, | |
| "step": 55725 | |
| }, | |
| { | |
| "epoch": 28.09, | |
| "grad_norm": 0.13013307750225067, | |
| "learning_rate": 4.449346733668342e-06, | |
| "loss": 0.0002, | |
| "step": 55750 | |
| }, | |
| { | |
| "epoch": 28.1, | |
| "grad_norm": 0.13601085543632507, | |
| "learning_rate": 4.446834170854272e-06, | |
| "loss": 0.0004, | |
| "step": 55775 | |
| }, | |
| { | |
| "epoch": 28.11, | |
| "grad_norm": 0.6023567318916321, | |
| "learning_rate": 4.4443216080402014e-06, | |
| "loss": 0.0003, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 28.12, | |
| "grad_norm": 0.09424587339162827, | |
| "learning_rate": 4.441809045226131e-06, | |
| "loss": 0.0003, | |
| "step": 55825 | |
| }, | |
| { | |
| "epoch": 28.14, | |
| "grad_norm": 0.04044128209352493, | |
| "learning_rate": 4.43929648241206e-06, | |
| "loss": 0.0003, | |
| "step": 55850 | |
| }, | |
| { | |
| "epoch": 28.15, | |
| "grad_norm": 0.16100598871707916, | |
| "learning_rate": 4.43678391959799e-06, | |
| "loss": 0.0002, | |
| "step": 55875 | |
| }, | |
| { | |
| "epoch": 28.16, | |
| "grad_norm": 0.3516765832901001, | |
| "learning_rate": 4.4342713567839204e-06, | |
| "loss": 0.0002, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 28.17, | |
| "grad_norm": 0.2532273828983307, | |
| "learning_rate": 4.43175879396985e-06, | |
| "loss": 0.0002, | |
| "step": 55925 | |
| }, | |
| { | |
| "epoch": 28.19, | |
| "grad_norm": 0.19065579771995544, | |
| "learning_rate": 4.429246231155779e-06, | |
| "loss": 0.0006, | |
| "step": 55950 | |
| }, | |
| { | |
| "epoch": 28.2, | |
| "grad_norm": 0.30931228399276733, | |
| "learning_rate": 4.426733668341709e-06, | |
| "loss": 0.0006, | |
| "step": 55975 | |
| }, | |
| { | |
| "epoch": 28.21, | |
| "grad_norm": 0.30858734250068665, | |
| "learning_rate": 4.4242211055276386e-06, | |
| "loss": 0.0003, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 28.21, | |
| "eval_loss": 0.3624221980571747, | |
| "eval_runtime": 781.7858, | |
| "eval_samples_per_second": 1.802, | |
| "eval_steps_per_second": 1.802, | |
| "eval_wer": 22.172258734002074, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 28.22, | |
| "grad_norm": 0.08408491313457489, | |
| "learning_rate": 4.421708542713568e-06, | |
| "loss": 0.0002, | |
| "step": 56025 | |
| }, | |
| { | |
| "epoch": 28.24, | |
| "grad_norm": 0.06299348175525665, | |
| "learning_rate": 4.419195979899498e-06, | |
| "loss": 0.0005, | |
| "step": 56050 | |
| }, | |
| { | |
| "epoch": 28.25, | |
| "grad_norm": 0.07827432453632355, | |
| "learning_rate": 4.416683417085427e-06, | |
| "loss": 0.0004, | |
| "step": 56075 | |
| }, | |
| { | |
| "epoch": 28.26, | |
| "grad_norm": 0.8975947499275208, | |
| "learning_rate": 4.4141708542713576e-06, | |
| "loss": 0.0005, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 28.27, | |
| "grad_norm": 1.2385715246200562, | |
| "learning_rate": 4.411658291457287e-06, | |
| "loss": 0.0005, | |
| "step": 56125 | |
| }, | |
| { | |
| "epoch": 28.29, | |
| "grad_norm": 0.19437459111213684, | |
| "learning_rate": 4.409145728643216e-06, | |
| "loss": 0.0006, | |
| "step": 56150 | |
| }, | |
| { | |
| "epoch": 28.3, | |
| "grad_norm": 1.1243386268615723, | |
| "learning_rate": 4.406633165829146e-06, | |
| "loss": 0.0006, | |
| "step": 56175 | |
| }, | |
| { | |
| "epoch": 28.31, | |
| "grad_norm": 0.16806860268115997, | |
| "learning_rate": 4.404120603015076e-06, | |
| "loss": 0.0005, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 28.32, | |
| "grad_norm": 0.5347501635551453, | |
| "learning_rate": 4.401608040201005e-06, | |
| "loss": 0.0003, | |
| "step": 56225 | |
| }, | |
| { | |
| "epoch": 28.34, | |
| "grad_norm": 0.3039199113845825, | |
| "learning_rate": 4.399095477386935e-06, | |
| "loss": 0.0004, | |
| "step": 56250 | |
| }, | |
| { | |
| "epoch": 28.35, | |
| "grad_norm": 0.7065151929855347, | |
| "learning_rate": 4.3965829145728645e-06, | |
| "loss": 0.0006, | |
| "step": 56275 | |
| }, | |
| { | |
| "epoch": 28.36, | |
| "grad_norm": 0.7291182279586792, | |
| "learning_rate": 4.394070351758795e-06, | |
| "loss": 0.0006, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 28.38, | |
| "grad_norm": 0.11983204632997513, | |
| "learning_rate": 4.391557788944724e-06, | |
| "loss": 0.0005, | |
| "step": 56325 | |
| }, | |
| { | |
| "epoch": 28.39, | |
| "grad_norm": 0.32822325825691223, | |
| "learning_rate": 4.389045226130654e-06, | |
| "loss": 0.0011, | |
| "step": 56350 | |
| }, | |
| { | |
| "epoch": 28.4, | |
| "grad_norm": 0.3108604848384857, | |
| "learning_rate": 4.3865326633165835e-06, | |
| "loss": 0.0005, | |
| "step": 56375 | |
| }, | |
| { | |
| "epoch": 28.41, | |
| "grad_norm": 0.2673742175102234, | |
| "learning_rate": 4.384020100502513e-06, | |
| "loss": 0.0004, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 28.43, | |
| "grad_norm": 0.8592258095741272, | |
| "learning_rate": 4.381507537688442e-06, | |
| "loss": 0.0004, | |
| "step": 56425 | |
| }, | |
| { | |
| "epoch": 28.44, | |
| "grad_norm": 0.1229337826371193, | |
| "learning_rate": 4.378994974874372e-06, | |
| "loss": 0.0004, | |
| "step": 56450 | |
| }, | |
| { | |
| "epoch": 28.45, | |
| "grad_norm": 0.470473051071167, | |
| "learning_rate": 4.376482412060302e-06, | |
| "loss": 0.0004, | |
| "step": 56475 | |
| }, | |
| { | |
| "epoch": 28.46, | |
| "grad_norm": 0.09908214956521988, | |
| "learning_rate": 4.373969849246231e-06, | |
| "loss": 0.0004, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 28.48, | |
| "grad_norm": 0.11872223764657974, | |
| "learning_rate": 4.371457286432161e-06, | |
| "loss": 0.0008, | |
| "step": 56525 | |
| }, | |
| { | |
| "epoch": 28.49, | |
| "grad_norm": 1.120409607887268, | |
| "learning_rate": 4.368944723618091e-06, | |
| "loss": 0.0006, | |
| "step": 56550 | |
| }, | |
| { | |
| "epoch": 28.5, | |
| "grad_norm": 0.6582888960838318, | |
| "learning_rate": 4.366432160804021e-06, | |
| "loss": 0.0008, | |
| "step": 56575 | |
| }, | |
| { | |
| "epoch": 28.51, | |
| "grad_norm": 0.6543013453483582, | |
| "learning_rate": 4.36391959798995e-06, | |
| "loss": 0.0007, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 28.53, | |
| "grad_norm": 0.4543153941631317, | |
| "learning_rate": 4.36140703517588e-06, | |
| "loss": 0.0011, | |
| "step": 56625 | |
| }, | |
| { | |
| "epoch": 28.54, | |
| "grad_norm": 0.8596717119216919, | |
| "learning_rate": 4.3588944723618094e-06, | |
| "loss": 0.0007, | |
| "step": 56650 | |
| }, | |
| { | |
| "epoch": 28.55, | |
| "grad_norm": 1.2596262693405151, | |
| "learning_rate": 4.356381909547739e-06, | |
| "loss": 0.0008, | |
| "step": 56675 | |
| }, | |
| { | |
| "epoch": 28.56, | |
| "grad_norm": 0.8046451807022095, | |
| "learning_rate": 4.353869346733668e-06, | |
| "loss": 0.0009, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 28.58, | |
| "grad_norm": 0.7610066533088684, | |
| "learning_rate": 4.351356783919598e-06, | |
| "loss": 0.0014, | |
| "step": 56725 | |
| }, | |
| { | |
| "epoch": 28.59, | |
| "grad_norm": 0.3740193247795105, | |
| "learning_rate": 4.3488442211055284e-06, | |
| "loss": 0.0008, | |
| "step": 56750 | |
| }, | |
| { | |
| "epoch": 28.6, | |
| "grad_norm": 0.3946913480758667, | |
| "learning_rate": 4.346331658291458e-06, | |
| "loss": 0.0013, | |
| "step": 56775 | |
| }, | |
| { | |
| "epoch": 28.61, | |
| "grad_norm": 1.1675328016281128, | |
| "learning_rate": 4.343819095477387e-06, | |
| "loss": 0.0011, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 28.63, | |
| "grad_norm": 0.23593860864639282, | |
| "learning_rate": 4.341306532663317e-06, | |
| "loss": 0.001, | |
| "step": 56825 | |
| }, | |
| { | |
| "epoch": 28.64, | |
| "grad_norm": 0.12008998543024063, | |
| "learning_rate": 4.3387939698492466e-06, | |
| "loss": 0.0008, | |
| "step": 56850 | |
| }, | |
| { | |
| "epoch": 28.65, | |
| "grad_norm": 0.1624538004398346, | |
| "learning_rate": 4.336281407035176e-06, | |
| "loss": 0.0007, | |
| "step": 56875 | |
| }, | |
| { | |
| "epoch": 28.66, | |
| "grad_norm": 0.2177920937538147, | |
| "learning_rate": 4.333768844221106e-06, | |
| "loss": 0.0007, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 28.68, | |
| "grad_norm": 0.23931287229061127, | |
| "learning_rate": 4.331256281407035e-06, | |
| "loss": 0.0005, | |
| "step": 56925 | |
| }, | |
| { | |
| "epoch": 28.69, | |
| "grad_norm": 0.3201751410961151, | |
| "learning_rate": 4.3287437185929656e-06, | |
| "loss": 0.0009, | |
| "step": 56950 | |
| }, | |
| { | |
| "epoch": 28.7, | |
| "grad_norm": 0.13656805455684662, | |
| "learning_rate": 4.326231155778895e-06, | |
| "loss": 0.0008, | |
| "step": 56975 | |
| }, | |
| { | |
| "epoch": 28.72, | |
| "grad_norm": 0.4347066581249237, | |
| "learning_rate": 4.323718592964824e-06, | |
| "loss": 0.0006, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 28.72, | |
| "eval_loss": 0.3676753342151642, | |
| "eval_runtime": 647.1207, | |
| "eval_samples_per_second": 2.177, | |
| "eval_steps_per_second": 2.177, | |
| "eval_wer": 22.73953649256313, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 28.73, | |
| "grad_norm": 0.6821175217628479, | |
| "learning_rate": 4.321206030150754e-06, | |
| "loss": 0.0008, | |
| "step": 57025 | |
| }, | |
| { | |
| "epoch": 28.74, | |
| "grad_norm": 0.8675858974456787, | |
| "learning_rate": 4.318693467336684e-06, | |
| "loss": 0.0007, | |
| "step": 57050 | |
| }, | |
| { | |
| "epoch": 28.75, | |
| "grad_norm": 0.7905115485191345, | |
| "learning_rate": 4.316180904522613e-06, | |
| "loss": 0.0009, | |
| "step": 57075 | |
| }, | |
| { | |
| "epoch": 28.77, | |
| "grad_norm": 0.3380034565925598, | |
| "learning_rate": 4.313668341708543e-06, | |
| "loss": 0.0008, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 28.78, | |
| "grad_norm": 0.5678602457046509, | |
| "learning_rate": 4.3111557788944725e-06, | |
| "loss": 0.0007, | |
| "step": 57125 | |
| }, | |
| { | |
| "epoch": 28.79, | |
| "grad_norm": 0.4038754105567932, | |
| "learning_rate": 4.308643216080403e-06, | |
| "loss": 0.0006, | |
| "step": 57150 | |
| }, | |
| { | |
| "epoch": 28.8, | |
| "grad_norm": 0.8682851195335388, | |
| "learning_rate": 4.306130653266332e-06, | |
| "loss": 0.0005, | |
| "step": 57175 | |
| }, | |
| { | |
| "epoch": 28.82, | |
| "grad_norm": 0.2736469507217407, | |
| "learning_rate": 4.303618090452262e-06, | |
| "loss": 0.0008, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 28.83, | |
| "grad_norm": 0.056282587349414825, | |
| "learning_rate": 4.3011055276381915e-06, | |
| "loss": 0.0005, | |
| "step": 57225 | |
| }, | |
| { | |
| "epoch": 28.84, | |
| "grad_norm": 0.7956998944282532, | |
| "learning_rate": 4.298592964824121e-06, | |
| "loss": 0.0006, | |
| "step": 57250 | |
| }, | |
| { | |
| "epoch": 28.85, | |
| "grad_norm": 0.39681777358055115, | |
| "learning_rate": 4.29608040201005e-06, | |
| "loss": 0.0005, | |
| "step": 57275 | |
| }, | |
| { | |
| "epoch": 28.87, | |
| "grad_norm": 1.7592027187347412, | |
| "learning_rate": 4.29356783919598e-06, | |
| "loss": 0.0005, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 28.88, | |
| "grad_norm": 0.40772297978401184, | |
| "learning_rate": 4.29105527638191e-06, | |
| "loss": 0.0003, | |
| "step": 57325 | |
| }, | |
| { | |
| "epoch": 28.89, | |
| "grad_norm": 0.3130989074707031, | |
| "learning_rate": 4.28854271356784e-06, | |
| "loss": 0.0003, | |
| "step": 57350 | |
| }, | |
| { | |
| "epoch": 28.9, | |
| "grad_norm": 0.22975189983844757, | |
| "learning_rate": 4.286030150753769e-06, | |
| "loss": 0.0004, | |
| "step": 57375 | |
| }, | |
| { | |
| "epoch": 28.92, | |
| "grad_norm": 0.8328010439872742, | |
| "learning_rate": 4.283517587939699e-06, | |
| "loss": 0.0006, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 28.93, | |
| "grad_norm": 0.41630086302757263, | |
| "learning_rate": 4.281005025125629e-06, | |
| "loss": 0.0005, | |
| "step": 57425 | |
| }, | |
| { | |
| "epoch": 28.94, | |
| "grad_norm": 0.48607903718948364, | |
| "learning_rate": 4.278492462311558e-06, | |
| "loss": 0.0005, | |
| "step": 57450 | |
| }, | |
| { | |
| "epoch": 28.95, | |
| "grad_norm": 0.3274035155773163, | |
| "learning_rate": 4.275979899497488e-06, | |
| "loss": 0.0005, | |
| "step": 57475 | |
| }, | |
| { | |
| "epoch": 28.97, | |
| "grad_norm": 0.2861725687980652, | |
| "learning_rate": 4.2734673366834174e-06, | |
| "loss": 0.0006, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 28.98, | |
| "grad_norm": 0.6594395041465759, | |
| "learning_rate": 4.270954773869347e-06, | |
| "loss": 0.0005, | |
| "step": 57525 | |
| }, | |
| { | |
| "epoch": 28.99, | |
| "grad_norm": 0.2788364887237549, | |
| "learning_rate": 4.268442211055277e-06, | |
| "loss": 0.0003, | |
| "step": 57550 | |
| }, | |
| { | |
| "epoch": 29.01, | |
| "grad_norm": 0.4915536344051361, | |
| "learning_rate": 4.265929648241206e-06, | |
| "loss": 0.0004, | |
| "step": 57575 | |
| }, | |
| { | |
| "epoch": 29.02, | |
| "grad_norm": 0.14363212883472443, | |
| "learning_rate": 4.2634170854271364e-06, | |
| "loss": 0.0003, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 29.03, | |
| "grad_norm": 0.7467771172523499, | |
| "learning_rate": 4.260904522613066e-06, | |
| "loss": 0.0005, | |
| "step": 57625 | |
| }, | |
| { | |
| "epoch": 29.04, | |
| "grad_norm": 0.7711644172668457, | |
| "learning_rate": 4.258391959798995e-06, | |
| "loss": 0.0005, | |
| "step": 57650 | |
| }, | |
| { | |
| "epoch": 29.06, | |
| "grad_norm": 0.3670036494731903, | |
| "learning_rate": 4.255879396984925e-06, | |
| "loss": 0.0004, | |
| "step": 57675 | |
| }, | |
| { | |
| "epoch": 29.07, | |
| "grad_norm": 0.2002963125705719, | |
| "learning_rate": 4.2533668341708546e-06, | |
| "loss": 0.0005, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 29.08, | |
| "grad_norm": 0.13585589826107025, | |
| "learning_rate": 4.250854271356784e-06, | |
| "loss": 0.0003, | |
| "step": 57725 | |
| }, | |
| { | |
| "epoch": 29.09, | |
| "grad_norm": 0.4625987112522125, | |
| "learning_rate": 4.248341708542714e-06, | |
| "loss": 0.0003, | |
| "step": 57750 | |
| }, | |
| { | |
| "epoch": 29.11, | |
| "grad_norm": 0.43475794792175293, | |
| "learning_rate": 4.245829145728643e-06, | |
| "loss": 0.0003, | |
| "step": 57775 | |
| }, | |
| { | |
| "epoch": 29.12, | |
| "grad_norm": 0.46216335892677307, | |
| "learning_rate": 4.2433165829145736e-06, | |
| "loss": 0.0004, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 29.13, | |
| "grad_norm": 0.10933476686477661, | |
| "learning_rate": 4.240804020100503e-06, | |
| "loss": 0.0003, | |
| "step": 57825 | |
| }, | |
| { | |
| "epoch": 29.14, | |
| "grad_norm": 0.3376076817512512, | |
| "learning_rate": 4.238291457286432e-06, | |
| "loss": 0.0005, | |
| "step": 57850 | |
| }, | |
| { | |
| "epoch": 29.16, | |
| "grad_norm": 0.09248408675193787, | |
| "learning_rate": 4.235778894472362e-06, | |
| "loss": 0.0004, | |
| "step": 57875 | |
| }, | |
| { | |
| "epoch": 29.17, | |
| "grad_norm": 0.165025994181633, | |
| "learning_rate": 4.233266331658292e-06, | |
| "loss": 0.0004, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 29.18, | |
| "grad_norm": 0.20500481128692627, | |
| "learning_rate": 4.230753768844221e-06, | |
| "loss": 0.0002, | |
| "step": 57925 | |
| }, | |
| { | |
| "epoch": 29.19, | |
| "grad_norm": 1.7176584005355835, | |
| "learning_rate": 4.228241206030151e-06, | |
| "loss": 0.0003, | |
| "step": 57950 | |
| }, | |
| { | |
| "epoch": 29.21, | |
| "grad_norm": 0.04315100982785225, | |
| "learning_rate": 4.225829145728644e-06, | |
| "loss": 0.0004, | |
| "step": 57975 | |
| }, | |
| { | |
| "epoch": 29.22, | |
| "grad_norm": 0.7562754154205322, | |
| "learning_rate": 4.223316582914574e-06, | |
| "loss": 0.0005, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 29.22, | |
| "eval_loss": 0.36634162068367004, | |
| "eval_runtime": 646.3446, | |
| "eval_samples_per_second": 2.18, | |
| "eval_steps_per_second": 2.18, | |
| "eval_wer": 22.877896921480456, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 29.23, | |
| "grad_norm": 0.18568870425224304, | |
| "learning_rate": 4.220804020100503e-06, | |
| "loss": 0.0003, | |
| "step": 58025 | |
| }, | |
| { | |
| "epoch": 29.24, | |
| "grad_norm": 0.02903875522315502, | |
| "learning_rate": 4.218291457286432e-06, | |
| "loss": 0.0002, | |
| "step": 58050 | |
| }, | |
| { | |
| "epoch": 29.26, | |
| "grad_norm": 0.5034275054931641, | |
| "learning_rate": 4.215778894472362e-06, | |
| "loss": 0.0003, | |
| "step": 58075 | |
| }, | |
| { | |
| "epoch": 29.27, | |
| "grad_norm": 0.24400202929973602, | |
| "learning_rate": 4.213266331658292e-06, | |
| "loss": 0.0004, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 29.28, | |
| "grad_norm": 0.43526986241340637, | |
| "learning_rate": 4.210753768844221e-06, | |
| "loss": 0.0004, | |
| "step": 58125 | |
| }, | |
| { | |
| "epoch": 29.29, | |
| "grad_norm": 0.8774734735488892, | |
| "learning_rate": 4.2082412060301505e-06, | |
| "loss": 0.0003, | |
| "step": 58150 | |
| }, | |
| { | |
| "epoch": 29.31, | |
| "grad_norm": 0.2758621275424957, | |
| "learning_rate": 4.205728643216081e-06, | |
| "loss": 0.0004, | |
| "step": 58175 | |
| }, | |
| { | |
| "epoch": 29.32, | |
| "grad_norm": 0.09988962113857269, | |
| "learning_rate": 4.203216080402011e-06, | |
| "loss": 0.0002, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 29.33, | |
| "grad_norm": 0.04445599764585495, | |
| "learning_rate": 4.20070351758794e-06, | |
| "loss": 0.0002, | |
| "step": 58225 | |
| }, | |
| { | |
| "epoch": 29.35, | |
| "grad_norm": 0.6248559355735779, | |
| "learning_rate": 4.1981909547738695e-06, | |
| "loss": 0.0002, | |
| "step": 58250 | |
| }, | |
| { | |
| "epoch": 29.36, | |
| "grad_norm": 0.5372090339660645, | |
| "learning_rate": 4.1956783919598e-06, | |
| "loss": 0.0003, | |
| "step": 58275 | |
| }, | |
| { | |
| "epoch": 29.37, | |
| "grad_norm": 0.05950794741511345, | |
| "learning_rate": 4.193165829145729e-06, | |
| "loss": 0.0003, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 29.38, | |
| "grad_norm": 0.2952395975589752, | |
| "learning_rate": 4.190653266331658e-06, | |
| "loss": 0.0005, | |
| "step": 58325 | |
| }, | |
| { | |
| "epoch": 29.4, | |
| "grad_norm": 1.0344479084014893, | |
| "learning_rate": 4.188140703517588e-06, | |
| "loss": 0.0005, | |
| "step": 58350 | |
| }, | |
| { | |
| "epoch": 29.41, | |
| "grad_norm": 0.07533106207847595, | |
| "learning_rate": 4.185628140703518e-06, | |
| "loss": 0.0008, | |
| "step": 58375 | |
| }, | |
| { | |
| "epoch": 29.42, | |
| "grad_norm": 0.19977255165576935, | |
| "learning_rate": 4.183115577889448e-06, | |
| "loss": 0.0005, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 29.43, | |
| "grad_norm": 0.21329541504383087, | |
| "learning_rate": 4.180603015075377e-06, | |
| "loss": 0.0003, | |
| "step": 58425 | |
| }, | |
| { | |
| "epoch": 29.45, | |
| "grad_norm": 0.2345517873764038, | |
| "learning_rate": 4.178090452261307e-06, | |
| "loss": 0.0006, | |
| "step": 58450 | |
| }, | |
| { | |
| "epoch": 29.46, | |
| "grad_norm": 0.028459738940000534, | |
| "learning_rate": 4.175577889447237e-06, | |
| "loss": 0.0004, | |
| "step": 58475 | |
| }, | |
| { | |
| "epoch": 29.47, | |
| "grad_norm": 0.8179320693016052, | |
| "learning_rate": 4.173065326633166e-06, | |
| "loss": 0.0005, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 29.48, | |
| "grad_norm": 0.8002499341964722, | |
| "learning_rate": 4.1705527638190955e-06, | |
| "loss": 0.0004, | |
| "step": 58525 | |
| }, | |
| { | |
| "epoch": 29.5, | |
| "grad_norm": 0.18765227496623993, | |
| "learning_rate": 4.168040201005026e-06, | |
| "loss": 0.0004, | |
| "step": 58550 | |
| }, | |
| { | |
| "epoch": 29.51, | |
| "grad_norm": 0.20206257700920105, | |
| "learning_rate": 4.165527638190955e-06, | |
| "loss": 0.0008, | |
| "step": 58575 | |
| }, | |
| { | |
| "epoch": 29.52, | |
| "grad_norm": 0.7327660322189331, | |
| "learning_rate": 4.163015075376885e-06, | |
| "loss": 0.0005, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 29.53, | |
| "grad_norm": 1.6903936862945557, | |
| "learning_rate": 4.1605025125628145e-06, | |
| "loss": 0.0007, | |
| "step": 58625 | |
| }, | |
| { | |
| "epoch": 29.55, | |
| "grad_norm": 0.523650050163269, | |
| "learning_rate": 4.157989949748744e-06, | |
| "loss": 0.0004, | |
| "step": 58650 | |
| }, | |
| { | |
| "epoch": 29.56, | |
| "grad_norm": 0.5737274289131165, | |
| "learning_rate": 4.155477386934674e-06, | |
| "loss": 0.0006, | |
| "step": 58675 | |
| }, | |
| { | |
| "epoch": 29.57, | |
| "grad_norm": 0.43866389989852905, | |
| "learning_rate": 4.152964824120603e-06, | |
| "loss": 0.0005, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 29.58, | |
| "grad_norm": 0.3836389183998108, | |
| "learning_rate": 4.150452261306533e-06, | |
| "loss": 0.0006, | |
| "step": 58725 | |
| }, | |
| { | |
| "epoch": 29.6, | |
| "grad_norm": 0.2658005654811859, | |
| "learning_rate": 4.147939698492463e-06, | |
| "loss": 0.0005, | |
| "step": 58750 | |
| }, | |
| { | |
| "epoch": 29.61, | |
| "grad_norm": 0.9957432150840759, | |
| "learning_rate": 4.145427135678392e-06, | |
| "loss": 0.0006, | |
| "step": 58775 | |
| }, | |
| { | |
| "epoch": 29.62, | |
| "grad_norm": 0.4388526380062103, | |
| "learning_rate": 4.142914572864322e-06, | |
| "loss": 0.0005, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 29.63, | |
| "grad_norm": 0.48335981369018555, | |
| "learning_rate": 4.140402010050252e-06, | |
| "loss": 0.0008, | |
| "step": 58825 | |
| }, | |
| { | |
| "epoch": 29.65, | |
| "grad_norm": 0.6199666261672974, | |
| "learning_rate": 4.137889447236182e-06, | |
| "loss": 0.0006, | |
| "step": 58850 | |
| }, | |
| { | |
| "epoch": 29.66, | |
| "grad_norm": 0.406076043844223, | |
| "learning_rate": 4.135376884422111e-06, | |
| "loss": 0.0006, | |
| "step": 58875 | |
| }, | |
| { | |
| "epoch": 29.67, | |
| "grad_norm": 0.1077524796128273, | |
| "learning_rate": 4.13286432160804e-06, | |
| "loss": 0.0005, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 29.69, | |
| "grad_norm": 0.3811541795730591, | |
| "learning_rate": 4.13035175879397e-06, | |
| "loss": 0.0007, | |
| "step": 58925 | |
| }, | |
| { | |
| "epoch": 29.7, | |
| "grad_norm": 1.440758228302002, | |
| "learning_rate": 4.1278391959799e-06, | |
| "loss": 0.0006, | |
| "step": 58950 | |
| }, | |
| { | |
| "epoch": 29.71, | |
| "grad_norm": 0.47515958547592163, | |
| "learning_rate": 4.125326633165829e-06, | |
| "loss": 0.0004, | |
| "step": 58975 | |
| }, | |
| { | |
| "epoch": 29.72, | |
| "grad_norm": 0.956475019454956, | |
| "learning_rate": 4.122814070351759e-06, | |
| "loss": 0.0006, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 29.72, | |
| "eval_loss": 0.37018460035324097, | |
| "eval_runtime": 650.5122, | |
| "eval_samples_per_second": 2.166, | |
| "eval_steps_per_second": 2.166, | |
| "eval_wer": 23.348322379799377, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 29.74, | |
| "grad_norm": 1.6364458799362183, | |
| "learning_rate": 4.120301507537689e-06, | |
| "loss": 0.0008, | |
| "step": 59025 | |
| }, | |
| { | |
| "epoch": 29.75, | |
| "grad_norm": 0.5471516251564026, | |
| "learning_rate": 4.117788944723619e-06, | |
| "loss": 0.0007, | |
| "step": 59050 | |
| }, | |
| { | |
| "epoch": 29.76, | |
| "grad_norm": 0.9236280918121338, | |
| "learning_rate": 4.115276381909548e-06, | |
| "loss": 0.0007, | |
| "step": 59075 | |
| }, | |
| { | |
| "epoch": 29.77, | |
| "grad_norm": 0.1981869339942932, | |
| "learning_rate": 4.1127638190954775e-06, | |
| "loss": 0.0007, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 29.79, | |
| "grad_norm": 0.7604771852493286, | |
| "learning_rate": 4.110251256281408e-06, | |
| "loss": 0.0004, | |
| "step": 59125 | |
| }, | |
| { | |
| "epoch": 29.8, | |
| "grad_norm": 0.5981962084770203, | |
| "learning_rate": 4.107738693467337e-06, | |
| "loss": 0.0007, | |
| "step": 59150 | |
| }, | |
| { | |
| "epoch": 29.81, | |
| "grad_norm": 0.9889633655548096, | |
| "learning_rate": 4.105226130653266e-06, | |
| "loss": 0.0004, | |
| "step": 59175 | |
| }, | |
| { | |
| "epoch": 29.82, | |
| "grad_norm": 0.33064815402030945, | |
| "learning_rate": 4.102713567839196e-06, | |
| "loss": 0.0007, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 29.84, | |
| "grad_norm": 0.3253353238105774, | |
| "learning_rate": 4.100201005025126e-06, | |
| "loss": 0.0005, | |
| "step": 59225 | |
| }, | |
| { | |
| "epoch": 29.85, | |
| "grad_norm": 1.7824808359146118, | |
| "learning_rate": 4.097688442211056e-06, | |
| "loss": 0.0006, | |
| "step": 59250 | |
| }, | |
| { | |
| "epoch": 29.86, | |
| "grad_norm": 0.1777506172657013, | |
| "learning_rate": 4.095175879396985e-06, | |
| "loss": 0.0004, | |
| "step": 59275 | |
| }, | |
| { | |
| "epoch": 29.87, | |
| "grad_norm": 0.09130828827619553, | |
| "learning_rate": 4.092663316582915e-06, | |
| "loss": 0.0005, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 29.89, | |
| "grad_norm": 0.26124000549316406, | |
| "learning_rate": 4.090150753768845e-06, | |
| "loss": 0.0004, | |
| "step": 59325 | |
| }, | |
| { | |
| "epoch": 29.9, | |
| "grad_norm": 0.2676754295825958, | |
| "learning_rate": 4.087638190954774e-06, | |
| "loss": 0.0004, | |
| "step": 59350 | |
| }, | |
| { | |
| "epoch": 29.91, | |
| "grad_norm": 0.7923325896263123, | |
| "learning_rate": 4.0851256281407035e-06, | |
| "loss": 0.0004, | |
| "step": 59375 | |
| }, | |
| { | |
| "epoch": 29.92, | |
| "grad_norm": 0.06875083595514297, | |
| "learning_rate": 4.082613065326634e-06, | |
| "loss": 0.0007, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 29.94, | |
| "grad_norm": 1.1524797677993774, | |
| "learning_rate": 4.080100502512564e-06, | |
| "loss": 0.0006, | |
| "step": 59425 | |
| }, | |
| { | |
| "epoch": 29.95, | |
| "grad_norm": 1.1805469989776611, | |
| "learning_rate": 4.077587939698493e-06, | |
| "loss": 0.0006, | |
| "step": 59450 | |
| }, | |
| { | |
| "epoch": 29.96, | |
| "grad_norm": 0.523131251335144, | |
| "learning_rate": 4.0750753768844225e-06, | |
| "loss": 0.0005, | |
| "step": 59475 | |
| }, | |
| { | |
| "epoch": 29.97, | |
| "grad_norm": 0.9448018074035645, | |
| "learning_rate": 4.072562814070352e-06, | |
| "loss": 0.0004, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 29.99, | |
| "grad_norm": 1.3744333982467651, | |
| "learning_rate": 4.070050251256282e-06, | |
| "loss": 0.0007, | |
| "step": 59525 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 0.2589998245239258, | |
| "learning_rate": 4.067537688442211e-06, | |
| "loss": 0.0006, | |
| "step": 59550 | |
| }, | |
| { | |
| "epoch": 30.01, | |
| "grad_norm": 0.44579869508743286, | |
| "learning_rate": 4.065025125628141e-06, | |
| "loss": 0.0004, | |
| "step": 59575 | |
| }, | |
| { | |
| "epoch": 30.03, | |
| "grad_norm": 0.35231125354766846, | |
| "learning_rate": 4.062512562814071e-06, | |
| "loss": 0.0003, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 30.04, | |
| "grad_norm": 1.131148099899292, | |
| "learning_rate": 4.060000000000001e-06, | |
| "loss": 0.0005, | |
| "step": 59625 | |
| }, | |
| { | |
| "epoch": 30.05, | |
| "grad_norm": 0.154410719871521, | |
| "learning_rate": 4.05748743718593e-06, | |
| "loss": 0.0005, | |
| "step": 59650 | |
| }, | |
| { | |
| "epoch": 30.06, | |
| "grad_norm": 0.5527713894844055, | |
| "learning_rate": 4.05497487437186e-06, | |
| "loss": 0.0006, | |
| "step": 59675 | |
| }, | |
| { | |
| "epoch": 30.08, | |
| "grad_norm": 0.12832514941692352, | |
| "learning_rate": 4.05246231155779e-06, | |
| "loss": 0.0003, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 30.09, | |
| "grad_norm": 0.059667546302080154, | |
| "learning_rate": 4.049949748743719e-06, | |
| "loss": 0.0006, | |
| "step": 59725 | |
| }, | |
| { | |
| "epoch": 30.1, | |
| "grad_norm": 0.9049032330513, | |
| "learning_rate": 4.047437185929648e-06, | |
| "loss": 0.0004, | |
| "step": 59750 | |
| }, | |
| { | |
| "epoch": 30.11, | |
| "grad_norm": 0.42407527565956116, | |
| "learning_rate": 4.044924623115578e-06, | |
| "loss": 0.0005, | |
| "step": 59775 | |
| }, | |
| { | |
| "epoch": 30.13, | |
| "grad_norm": 0.2845335304737091, | |
| "learning_rate": 4.042412060301508e-06, | |
| "loss": 0.0002, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 30.14, | |
| "grad_norm": 0.2934589087963104, | |
| "learning_rate": 4.039899497487437e-06, | |
| "loss": 0.0003, | |
| "step": 59825 | |
| }, | |
| { | |
| "epoch": 30.15, | |
| "grad_norm": 0.12243688106536865, | |
| "learning_rate": 4.037386934673367e-06, | |
| "loss": 0.0002, | |
| "step": 59850 | |
| }, | |
| { | |
| "epoch": 30.16, | |
| "grad_norm": 0.03510167449712753, | |
| "learning_rate": 4.034874371859297e-06, | |
| "loss": 0.0002, | |
| "step": 59875 | |
| }, | |
| { | |
| "epoch": 30.18, | |
| "grad_norm": 0.10497930645942688, | |
| "learning_rate": 4.032361809045227e-06, | |
| "loss": 0.0004, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 30.19, | |
| "grad_norm": 1.603574514389038, | |
| "learning_rate": 4.029849246231156e-06, | |
| "loss": 0.0003, | |
| "step": 59925 | |
| }, | |
| { | |
| "epoch": 30.2, | |
| "grad_norm": 0.07143130153417587, | |
| "learning_rate": 4.0273366834170855e-06, | |
| "loss": 0.0003, | |
| "step": 59950 | |
| }, | |
| { | |
| "epoch": 30.21, | |
| "grad_norm": 0.9120334386825562, | |
| "learning_rate": 4.024824120603016e-06, | |
| "loss": 0.0004, | |
| "step": 59975 | |
| }, | |
| { | |
| "epoch": 30.23, | |
| "grad_norm": 0.6783850193023682, | |
| "learning_rate": 4.022311557788945e-06, | |
| "loss": 0.0003, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 30.23, | |
| "eval_loss": 0.3732685446739197, | |
| "eval_runtime": 646.3241, | |
| "eval_samples_per_second": 2.18, | |
| "eval_steps_per_second": 2.18, | |
| "eval_wer": 22.490487720511933, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 30.24, | |
| "grad_norm": 0.0949537456035614, | |
| "learning_rate": 4.019798994974874e-06, | |
| "loss": 0.0003, | |
| "step": 60025 | |
| }, | |
| { | |
| "epoch": 30.25, | |
| "grad_norm": 0.08624821156263351, | |
| "learning_rate": 4.0172864321608045e-06, | |
| "loss": 0.0003, | |
| "step": 60050 | |
| }, | |
| { | |
| "epoch": 30.26, | |
| "grad_norm": 0.613463819026947, | |
| "learning_rate": 4.014874371859297e-06, | |
| "loss": 0.0004, | |
| "step": 60075 | |
| }, | |
| { | |
| "epoch": 30.28, | |
| "grad_norm": 1.7669380903244019, | |
| "learning_rate": 4.012361809045226e-06, | |
| "loss": 0.0004, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 30.29, | |
| "grad_norm": 0.22005651891231537, | |
| "learning_rate": 4.009849246231156e-06, | |
| "loss": 0.0005, | |
| "step": 60125 | |
| }, | |
| { | |
| "epoch": 30.3, | |
| "grad_norm": 0.448355108499527, | |
| "learning_rate": 4.007336683417086e-06, | |
| "loss": 0.0004, | |
| "step": 60150 | |
| }, | |
| { | |
| "epoch": 30.31, | |
| "grad_norm": 0.3999320864677429, | |
| "learning_rate": 4.004824120603015e-06, | |
| "loss": 0.0004, | |
| "step": 60175 | |
| }, | |
| { | |
| "epoch": 30.33, | |
| "grad_norm": 0.5650457143783569, | |
| "learning_rate": 4.002311557788945e-06, | |
| "loss": 0.0003, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 30.34, | |
| "grad_norm": 0.3573535084724426, | |
| "learning_rate": 3.9997989949748745e-06, | |
| "loss": 0.0003, | |
| "step": 60225 | |
| }, | |
| { | |
| "epoch": 30.35, | |
| "grad_norm": 0.04291848465800285, | |
| "learning_rate": 3.997286432160805e-06, | |
| "loss": 0.0004, | |
| "step": 60250 | |
| }, | |
| { | |
| "epoch": 30.37, | |
| "grad_norm": 0.6211608052253723, | |
| "learning_rate": 3.994773869346734e-06, | |
| "loss": 0.0003, | |
| "step": 60275 | |
| }, | |
| { | |
| "epoch": 30.38, | |
| "grad_norm": 0.09989487379789352, | |
| "learning_rate": 3.992261306532663e-06, | |
| "loss": 0.0003, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 30.39, | |
| "grad_norm": 0.6828473210334778, | |
| "learning_rate": 3.9897487437185935e-06, | |
| "loss": 0.0005, | |
| "step": 60325 | |
| }, | |
| { | |
| "epoch": 30.4, | |
| "grad_norm": 0.4750407338142395, | |
| "learning_rate": 3.987236180904523e-06, | |
| "loss": 0.0008, | |
| "step": 60350 | |
| }, | |
| { | |
| "epoch": 30.42, | |
| "grad_norm": 0.12395069003105164, | |
| "learning_rate": 3.984723618090452e-06, | |
| "loss": 0.0006, | |
| "step": 60375 | |
| }, | |
| { | |
| "epoch": 30.43, | |
| "grad_norm": 0.19850021600723267, | |
| "learning_rate": 3.982211055276382e-06, | |
| "loss": 0.0007, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 30.44, | |
| "grad_norm": 1.2987253665924072, | |
| "learning_rate": 3.979698492462312e-06, | |
| "loss": 0.0005, | |
| "step": 60425 | |
| }, | |
| { | |
| "epoch": 30.45, | |
| "grad_norm": 0.12956413626670837, | |
| "learning_rate": 3.977185929648242e-06, | |
| "loss": 0.0005, | |
| "step": 60450 | |
| }, | |
| { | |
| "epoch": 30.47, | |
| "grad_norm": 0.8285446166992188, | |
| "learning_rate": 3.974673366834171e-06, | |
| "loss": 0.0004, | |
| "step": 60475 | |
| }, | |
| { | |
| "epoch": 30.48, | |
| "grad_norm": 0.9099289774894714, | |
| "learning_rate": 3.972160804020101e-06, | |
| "loss": 0.0003, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 30.49, | |
| "grad_norm": 1.001538634300232, | |
| "learning_rate": 3.969648241206031e-06, | |
| "loss": 0.0004, | |
| "step": 60525 | |
| }, | |
| { | |
| "epoch": 30.5, | |
| "grad_norm": 0.4828273057937622, | |
| "learning_rate": 3.96713567839196e-06, | |
| "loss": 0.0003, | |
| "step": 60550 | |
| }, | |
| { | |
| "epoch": 30.52, | |
| "grad_norm": 1.4622454643249512, | |
| "learning_rate": 3.964623115577889e-06, | |
| "loss": 0.0005, | |
| "step": 60575 | |
| }, | |
| { | |
| "epoch": 30.53, | |
| "grad_norm": 0.2010238915681839, | |
| "learning_rate": 3.9621105527638195e-06, | |
| "loss": 0.0004, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 30.54, | |
| "grad_norm": 0.11599469929933548, | |
| "learning_rate": 3.959597989949749e-06, | |
| "loss": 0.0007, | |
| "step": 60625 | |
| }, | |
| { | |
| "epoch": 30.55, | |
| "grad_norm": 0.18616123497486115, | |
| "learning_rate": 3.957085427135678e-06, | |
| "loss": 0.0004, | |
| "step": 60650 | |
| }, | |
| { | |
| "epoch": 30.57, | |
| "grad_norm": 0.4725811779499054, | |
| "learning_rate": 3.954572864321608e-06, | |
| "loss": 0.0005, | |
| "step": 60675 | |
| }, | |
| { | |
| "epoch": 30.58, | |
| "grad_norm": 1.0674340724945068, | |
| "learning_rate": 3.9520603015075385e-06, | |
| "loss": 0.0006, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 30.59, | |
| "grad_norm": 0.5848013758659363, | |
| "learning_rate": 3.949547738693468e-06, | |
| "loss": 0.0004, | |
| "step": 60725 | |
| }, | |
| { | |
| "epoch": 30.6, | |
| "grad_norm": 0.26734229922294617, | |
| "learning_rate": 3.947035175879397e-06, | |
| "loss": 0.0006, | |
| "step": 60750 | |
| }, | |
| { | |
| "epoch": 30.62, | |
| "grad_norm": 0.08038675040006638, | |
| "learning_rate": 3.944522613065327e-06, | |
| "loss": 0.0006, | |
| "step": 60775 | |
| }, | |
| { | |
| "epoch": 30.63, | |
| "grad_norm": 0.4599511921405792, | |
| "learning_rate": 3.942010050251257e-06, | |
| "loss": 0.0005, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 30.64, | |
| "grad_norm": 1.278779149055481, | |
| "learning_rate": 3.939497487437186e-06, | |
| "loss": 0.0005, | |
| "step": 60825 | |
| }, | |
| { | |
| "epoch": 30.65, | |
| "grad_norm": 2.023319959640503, | |
| "learning_rate": 3.936984924623115e-06, | |
| "loss": 0.0006, | |
| "step": 60850 | |
| }, | |
| { | |
| "epoch": 30.67, | |
| "grad_norm": 0.48264050483703613, | |
| "learning_rate": 3.934472361809045e-06, | |
| "loss": 0.0004, | |
| "step": 60875 | |
| }, | |
| { | |
| "epoch": 30.68, | |
| "grad_norm": 0.313340961933136, | |
| "learning_rate": 3.931959798994976e-06, | |
| "loss": 0.0006, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 30.69, | |
| "grad_norm": 0.23155102133750916, | |
| "learning_rate": 3.929447236180905e-06, | |
| "loss": 0.0005, | |
| "step": 60925 | |
| }, | |
| { | |
| "epoch": 30.71, | |
| "grad_norm": 0.7064458727836609, | |
| "learning_rate": 3.926934673366834e-06, | |
| "loss": 0.0006, | |
| "step": 60950 | |
| }, | |
| { | |
| "epoch": 30.72, | |
| "grad_norm": 0.0884753167629242, | |
| "learning_rate": 3.924422110552764e-06, | |
| "loss": 0.0004, | |
| "step": 60975 | |
| }, | |
| { | |
| "epoch": 30.73, | |
| "grad_norm": 0.06730210036039352, | |
| "learning_rate": 3.921909547738694e-06, | |
| "loss": 0.0005, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 30.73, | |
| "eval_loss": 0.37581801414489746, | |
| "eval_runtime": 645.7901, | |
| "eval_samples_per_second": 2.182, | |
| "eval_steps_per_second": 2.182, | |
| "eval_wer": 22.80871670702179, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 30.74, | |
| "grad_norm": 0.38679373264312744, | |
| "learning_rate": 3.919396984924623e-06, | |
| "loss": 0.0006, | |
| "step": 61025 | |
| }, | |
| { | |
| "epoch": 30.76, | |
| "grad_norm": 1.362016201019287, | |
| "learning_rate": 3.916884422110553e-06, | |
| "loss": 0.0005, | |
| "step": 61050 | |
| }, | |
| { | |
| "epoch": 30.77, | |
| "grad_norm": 0.26858440041542053, | |
| "learning_rate": 3.914371859296483e-06, | |
| "loss": 0.0005, | |
| "step": 61075 | |
| }, | |
| { | |
| "epoch": 30.78, | |
| "grad_norm": 0.19505997002124786, | |
| "learning_rate": 3.911859296482413e-06, | |
| "loss": 0.0003, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 30.79, | |
| "grad_norm": 0.0629916712641716, | |
| "learning_rate": 3.909346733668342e-06, | |
| "loss": 0.0006, | |
| "step": 61125 | |
| }, | |
| { | |
| "epoch": 30.81, | |
| "grad_norm": 0.16181036829948425, | |
| "learning_rate": 3.906834170854271e-06, | |
| "loss": 0.0004, | |
| "step": 61150 | |
| }, | |
| { | |
| "epoch": 30.82, | |
| "grad_norm": 0.8519290685653687, | |
| "learning_rate": 3.9043216080402015e-06, | |
| "loss": 0.0004, | |
| "step": 61175 | |
| }, | |
| { | |
| "epoch": 30.83, | |
| "grad_norm": 0.3916480541229248, | |
| "learning_rate": 3.901809045226131e-06, | |
| "loss": 0.0003, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 30.84, | |
| "grad_norm": 0.1823578029870987, | |
| "learning_rate": 3.89929648241206e-06, | |
| "loss": 0.0005, | |
| "step": 61225 | |
| }, | |
| { | |
| "epoch": 30.86, | |
| "grad_norm": 0.525839626789093, | |
| "learning_rate": 3.89678391959799e-06, | |
| "loss": 0.0006, | |
| "step": 61250 | |
| }, | |
| { | |
| "epoch": 30.87, | |
| "grad_norm": 0.6914676427841187, | |
| "learning_rate": 3.89427135678392e-06, | |
| "loss": 0.0003, | |
| "step": 61275 | |
| }, | |
| { | |
| "epoch": 30.88, | |
| "grad_norm": 1.1129640340805054, | |
| "learning_rate": 3.89175879396985e-06, | |
| "loss": 0.0005, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 30.89, | |
| "grad_norm": 0.04879957437515259, | |
| "learning_rate": 3.889246231155779e-06, | |
| "loss": 0.0006, | |
| "step": 61325 | |
| }, | |
| { | |
| "epoch": 30.91, | |
| "grad_norm": 1.1155563592910767, | |
| "learning_rate": 3.886733668341709e-06, | |
| "loss": 0.0006, | |
| "step": 61350 | |
| }, | |
| { | |
| "epoch": 30.92, | |
| "grad_norm": 0.35383549332618713, | |
| "learning_rate": 3.884221105527639e-06, | |
| "loss": 0.0004, | |
| "step": 61375 | |
| }, | |
| { | |
| "epoch": 30.93, | |
| "grad_norm": 0.38548916578292847, | |
| "learning_rate": 3.881708542713568e-06, | |
| "loss": 0.0004, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 30.94, | |
| "grad_norm": 0.1265828013420105, | |
| "learning_rate": 3.879195979899497e-06, | |
| "loss": 0.0004, | |
| "step": 61425 | |
| }, | |
| { | |
| "epoch": 30.96, | |
| "grad_norm": 0.2077447772026062, | |
| "learning_rate": 3.8766834170854275e-06, | |
| "loss": 0.0004, | |
| "step": 61450 | |
| }, | |
| { | |
| "epoch": 30.97, | |
| "grad_norm": 0.43719515204429626, | |
| "learning_rate": 3.874170854271357e-06, | |
| "loss": 0.0004, | |
| "step": 61475 | |
| }, | |
| { | |
| "epoch": 30.98, | |
| "grad_norm": 0.25596338510513306, | |
| "learning_rate": 3.871658291457287e-06, | |
| "loss": 0.0004, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 30.99, | |
| "grad_norm": 0.24157127737998962, | |
| "learning_rate": 3.869145728643216e-06, | |
| "loss": 0.0005, | |
| "step": 61525 | |
| }, | |
| { | |
| "epoch": 31.01, | |
| "grad_norm": 0.2741214334964752, | |
| "learning_rate": 3.8666331658291465e-06, | |
| "loss": 0.0006, | |
| "step": 61550 | |
| }, | |
| { | |
| "epoch": 31.02, | |
| "grad_norm": 0.15417811274528503, | |
| "learning_rate": 3.864120603015076e-06, | |
| "loss": 0.0004, | |
| "step": 61575 | |
| }, | |
| { | |
| "epoch": 31.03, | |
| "grad_norm": 0.18006564676761627, | |
| "learning_rate": 3.861608040201005e-06, | |
| "loss": 0.0004, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 31.05, | |
| "grad_norm": 0.6540391445159912, | |
| "learning_rate": 3.859095477386935e-06, | |
| "loss": 0.0004, | |
| "step": 61625 | |
| }, | |
| { | |
| "epoch": 31.06, | |
| "grad_norm": 0.21219852566719055, | |
| "learning_rate": 3.856683417085428e-06, | |
| "loss": 0.0004, | |
| "step": 61650 | |
| }, | |
| { | |
| "epoch": 31.07, | |
| "grad_norm": 0.12796539068222046, | |
| "learning_rate": 3.854170854271357e-06, | |
| "loss": 0.0002, | |
| "step": 61675 | |
| }, | |
| { | |
| "epoch": 31.08, | |
| "grad_norm": 0.03786884620785713, | |
| "learning_rate": 3.851658291457287e-06, | |
| "loss": 0.0002, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 31.1, | |
| "grad_norm": 0.10505225509405136, | |
| "learning_rate": 3.8491457286432165e-06, | |
| "loss": 0.0002, | |
| "step": 61725 | |
| }, | |
| { | |
| "epoch": 31.11, | |
| "grad_norm": 0.2574862837791443, | |
| "learning_rate": 3.846633165829146e-06, | |
| "loss": 0.0002, | |
| "step": 61750 | |
| }, | |
| { | |
| "epoch": 31.12, | |
| "grad_norm": 0.09054882079362869, | |
| "learning_rate": 3.844120603015076e-06, | |
| "loss": 0.0002, | |
| "step": 61775 | |
| }, | |
| { | |
| "epoch": 31.13, | |
| "grad_norm": 0.25638747215270996, | |
| "learning_rate": 3.841608040201005e-06, | |
| "loss": 0.0004, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 31.15, | |
| "grad_norm": 0.5020123720169067, | |
| "learning_rate": 3.839095477386935e-06, | |
| "loss": 0.0004, | |
| "step": 61825 | |
| }, | |
| { | |
| "epoch": 31.16, | |
| "grad_norm": 0.1703236848115921, | |
| "learning_rate": 3.836582914572865e-06, | |
| "loss": 0.0003, | |
| "step": 61850 | |
| }, | |
| { | |
| "epoch": 31.17, | |
| "grad_norm": 0.22640874981880188, | |
| "learning_rate": 3.834070351758794e-06, | |
| "loss": 0.0003, | |
| "step": 61875 | |
| }, | |
| { | |
| "epoch": 31.18, | |
| "grad_norm": 0.21768644452095032, | |
| "learning_rate": 3.831557788944724e-06, | |
| "loss": 0.0002, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 31.2, | |
| "grad_norm": 0.32308635115623474, | |
| "learning_rate": 3.829045226130654e-06, | |
| "loss": 0.0003, | |
| "step": 61925 | |
| }, | |
| { | |
| "epoch": 31.21, | |
| "grad_norm": 0.20229199528694153, | |
| "learning_rate": 3.826532663316583e-06, | |
| "loss": 0.0004, | |
| "step": 61950 | |
| }, | |
| { | |
| "epoch": 31.22, | |
| "grad_norm": 0.10681883990764618, | |
| "learning_rate": 3.824020100502513e-06, | |
| "loss": 0.0005, | |
| "step": 61975 | |
| }, | |
| { | |
| "epoch": 31.23, | |
| "grad_norm": 0.28597140312194824, | |
| "learning_rate": 3.8215075376884424e-06, | |
| "loss": 0.0002, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 31.23, | |
| "eval_loss": 0.37976065278053284, | |
| "eval_runtime": 653.558, | |
| "eval_samples_per_second": 2.156, | |
| "eval_steps_per_second": 2.156, | |
| "eval_wer": 23.037011414735385, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 31.25, | |
| "grad_norm": 0.4050130248069763, | |
| "learning_rate": 3.818994974874372e-06, | |
| "loss": 0.0002, | |
| "step": 62025 | |
| }, | |
| { | |
| "epoch": 31.26, | |
| "grad_norm": 0.29295334219932556, | |
| "learning_rate": 3.816482412060302e-06, | |
| "loss": 0.0002, | |
| "step": 62050 | |
| }, | |
| { | |
| "epoch": 31.27, | |
| "grad_norm": 0.05594494193792343, | |
| "learning_rate": 3.8139698492462312e-06, | |
| "loss": 0.0002, | |
| "step": 62075 | |
| }, | |
| { | |
| "epoch": 31.28, | |
| "grad_norm": 0.06369101256132126, | |
| "learning_rate": 3.811457286432161e-06, | |
| "loss": 0.0002, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 31.3, | |
| "grad_norm": 0.04477281868457794, | |
| "learning_rate": 3.808944723618091e-06, | |
| "loss": 0.0001, | |
| "step": 62125 | |
| }, | |
| { | |
| "epoch": 31.31, | |
| "grad_norm": 0.09269160777330399, | |
| "learning_rate": 3.8064321608040205e-06, | |
| "loss": 0.0002, | |
| "step": 62150 | |
| }, | |
| { | |
| "epoch": 31.32, | |
| "grad_norm": 0.5558028817176819, | |
| "learning_rate": 3.8039195979899502e-06, | |
| "loss": 0.0002, | |
| "step": 62175 | |
| }, | |
| { | |
| "epoch": 31.34, | |
| "grad_norm": 0.03976639732718468, | |
| "learning_rate": 3.8014070351758796e-06, | |
| "loss": 0.0002, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 31.35, | |
| "grad_norm": 0.10835079848766327, | |
| "learning_rate": 3.7988944723618093e-06, | |
| "loss": 0.0002, | |
| "step": 62225 | |
| }, | |
| { | |
| "epoch": 31.36, | |
| "grad_norm": 1.0740280151367188, | |
| "learning_rate": 3.796381909547739e-06, | |
| "loss": 0.0002, | |
| "step": 62250 | |
| }, | |
| { | |
| "epoch": 31.37, | |
| "grad_norm": 0.20788809657096863, | |
| "learning_rate": 3.7938693467336684e-06, | |
| "loss": 0.0003, | |
| "step": 62275 | |
| }, | |
| { | |
| "epoch": 31.39, | |
| "grad_norm": 0.331663578748703, | |
| "learning_rate": 3.791356783919598e-06, | |
| "loss": 0.0005, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 31.4, | |
| "grad_norm": 0.09669731557369232, | |
| "learning_rate": 3.7888442211055283e-06, | |
| "loss": 0.0002, | |
| "step": 62325 | |
| }, | |
| { | |
| "epoch": 31.41, | |
| "grad_norm": 0.12564200162887573, | |
| "learning_rate": 3.7863316582914576e-06, | |
| "loss": 0.0002, | |
| "step": 62350 | |
| }, | |
| { | |
| "epoch": 31.42, | |
| "grad_norm": 0.16016307473182678, | |
| "learning_rate": 3.7838190954773874e-06, | |
| "loss": 0.0003, | |
| "step": 62375 | |
| }, | |
| { | |
| "epoch": 31.44, | |
| "grad_norm": 0.09345220774412155, | |
| "learning_rate": 3.781306532663317e-06, | |
| "loss": 0.0003, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 31.45, | |
| "grad_norm": 1.2475889921188354, | |
| "learning_rate": 3.7787939698492464e-06, | |
| "loss": 0.0002, | |
| "step": 62425 | |
| }, | |
| { | |
| "epoch": 31.46, | |
| "grad_norm": 0.4350406229496002, | |
| "learning_rate": 3.776281407035176e-06, | |
| "loss": 0.0003, | |
| "step": 62450 | |
| }, | |
| { | |
| "epoch": 31.47, | |
| "grad_norm": 0.13535748422145844, | |
| "learning_rate": 3.7737688442211055e-06, | |
| "loss": 0.0002, | |
| "step": 62475 | |
| }, | |
| { | |
| "epoch": 31.49, | |
| "grad_norm": 0.34315505623817444, | |
| "learning_rate": 3.7712562814070352e-06, | |
| "loss": 0.0004, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 31.5, | |
| "grad_norm": 1.264566421508789, | |
| "learning_rate": 3.7687437185929654e-06, | |
| "loss": 0.0004, | |
| "step": 62525 | |
| }, | |
| { | |
| "epoch": 31.51, | |
| "grad_norm": 0.1144946962594986, | |
| "learning_rate": 3.766231155778895e-06, | |
| "loss": 0.0004, | |
| "step": 62550 | |
| }, | |
| { | |
| "epoch": 31.52, | |
| "grad_norm": 0.1550832986831665, | |
| "learning_rate": 3.7637185929648245e-06, | |
| "loss": 0.0007, | |
| "step": 62575 | |
| }, | |
| { | |
| "epoch": 31.54, | |
| "grad_norm": 1.2980326414108276, | |
| "learning_rate": 3.7612060301507542e-06, | |
| "loss": 0.0006, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 31.55, | |
| "grad_norm": 0.5655810832977295, | |
| "learning_rate": 3.7586934673366836e-06, | |
| "loss": 0.0004, | |
| "step": 62625 | |
| }, | |
| { | |
| "epoch": 31.56, | |
| "grad_norm": 1.1469320058822632, | |
| "learning_rate": 3.7561809045226133e-06, | |
| "loss": 0.0003, | |
| "step": 62650 | |
| }, | |
| { | |
| "epoch": 31.57, | |
| "grad_norm": 0.10017550736665726, | |
| "learning_rate": 3.753668341708543e-06, | |
| "loss": 0.0003, | |
| "step": 62675 | |
| }, | |
| { | |
| "epoch": 31.59, | |
| "grad_norm": 0.0341104120016098, | |
| "learning_rate": 3.7511557788944724e-06, | |
| "loss": 0.0005, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 31.6, | |
| "grad_norm": 0.1871260702610016, | |
| "learning_rate": 3.748643216080402e-06, | |
| "loss": 0.0004, | |
| "step": 62725 | |
| }, | |
| { | |
| "epoch": 31.61, | |
| "grad_norm": 0.21561290323734283, | |
| "learning_rate": 3.7461306532663323e-06, | |
| "loss": 0.0003, | |
| "step": 62750 | |
| }, | |
| { | |
| "epoch": 31.62, | |
| "grad_norm": 0.1019524484872818, | |
| "learning_rate": 3.7436180904522616e-06, | |
| "loss": 0.0004, | |
| "step": 62775 | |
| }, | |
| { | |
| "epoch": 31.64, | |
| "grad_norm": 0.6602054834365845, | |
| "learning_rate": 3.7411055276381914e-06, | |
| "loss": 0.0003, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 31.65, | |
| "grad_norm": 0.06545541435480118, | |
| "learning_rate": 3.738592964824121e-06, | |
| "loss": 0.0006, | |
| "step": 62825 | |
| }, | |
| { | |
| "epoch": 31.66, | |
| "grad_norm": 0.6719912886619568, | |
| "learning_rate": 3.7360804020100504e-06, | |
| "loss": 0.0005, | |
| "step": 62850 | |
| }, | |
| { | |
| "epoch": 31.68, | |
| "grad_norm": 0.23920761048793793, | |
| "learning_rate": 3.73356783919598e-06, | |
| "loss": 0.0004, | |
| "step": 62875 | |
| }, | |
| { | |
| "epoch": 31.69, | |
| "grad_norm": 0.36367443203926086, | |
| "learning_rate": 3.7310552763819095e-06, | |
| "loss": 0.0004, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 31.7, | |
| "grad_norm": 0.1650412529706955, | |
| "learning_rate": 3.7285427135678392e-06, | |
| "loss": 0.0004, | |
| "step": 62925 | |
| }, | |
| { | |
| "epoch": 31.71, | |
| "grad_norm": 0.22094900906085968, | |
| "learning_rate": 3.7260301507537694e-06, | |
| "loss": 0.0003, | |
| "step": 62950 | |
| }, | |
| { | |
| "epoch": 31.73, | |
| "grad_norm": 0.06936586648225784, | |
| "learning_rate": 3.723517587939699e-06, | |
| "loss": 0.0003, | |
| "step": 62975 | |
| }, | |
| { | |
| "epoch": 31.74, | |
| "grad_norm": 0.3549976348876953, | |
| "learning_rate": 3.7210050251256285e-06, | |
| "loss": 0.0003, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 31.74, | |
| "eval_loss": 0.375165730714798, | |
| "eval_runtime": 654.2645, | |
| "eval_samples_per_second": 2.154, | |
| "eval_steps_per_second": 2.154, | |
| "eval_wer": 22.912487028709787, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 31.75, | |
| "grad_norm": 0.06242278590798378, | |
| "learning_rate": 3.7184924623115582e-06, | |
| "loss": 0.0003, | |
| "step": 63025 | |
| }, | |
| { | |
| "epoch": 31.76, | |
| "grad_norm": 0.029787451028823853, | |
| "learning_rate": 3.7159798994974876e-06, | |
| "loss": 0.0005, | |
| "step": 63050 | |
| }, | |
| { | |
| "epoch": 31.78, | |
| "grad_norm": 0.12795278429985046, | |
| "learning_rate": 3.7134673366834173e-06, | |
| "loss": 0.0004, | |
| "step": 63075 | |
| }, | |
| { | |
| "epoch": 31.79, | |
| "grad_norm": 0.10121666640043259, | |
| "learning_rate": 3.710954773869347e-06, | |
| "loss": 0.0006, | |
| "step": 63100 | |
| }, | |
| { | |
| "epoch": 31.8, | |
| "grad_norm": 0.10796695947647095, | |
| "learning_rate": 3.7084422110552764e-06, | |
| "loss": 0.0004, | |
| "step": 63125 | |
| }, | |
| { | |
| "epoch": 31.81, | |
| "grad_norm": 0.06431049853563309, | |
| "learning_rate": 3.7059296482412065e-06, | |
| "loss": 0.0006, | |
| "step": 63150 | |
| }, | |
| { | |
| "epoch": 31.83, | |
| "grad_norm": 0.07762473076581955, | |
| "learning_rate": 3.7034170854271363e-06, | |
| "loss": 0.0003, | |
| "step": 63175 | |
| }, | |
| { | |
| "epoch": 31.84, | |
| "grad_norm": 0.19045744836330414, | |
| "learning_rate": 3.7009045226130656e-06, | |
| "loss": 0.0003, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 31.85, | |
| "grad_norm": 0.495317667722702, | |
| "learning_rate": 3.6983919597989954e-06, | |
| "loss": 0.0008, | |
| "step": 63225 | |
| }, | |
| { | |
| "epoch": 31.86, | |
| "grad_norm": 0.44762441515922546, | |
| "learning_rate": 3.695879396984925e-06, | |
| "loss": 0.0003, | |
| "step": 63250 | |
| }, | |
| { | |
| "epoch": 31.88, | |
| "grad_norm": 0.7618858218193054, | |
| "learning_rate": 3.6933668341708544e-06, | |
| "loss": 0.0007, | |
| "step": 63275 | |
| }, | |
| { | |
| "epoch": 31.89, | |
| "grad_norm": 0.24009497463703156, | |
| "learning_rate": 3.690854271356784e-06, | |
| "loss": 0.0003, | |
| "step": 63300 | |
| }, | |
| { | |
| "epoch": 31.9, | |
| "grad_norm": 0.22943466901779175, | |
| "learning_rate": 3.6883417085427135e-06, | |
| "loss": 0.0003, | |
| "step": 63325 | |
| }, | |
| { | |
| "epoch": 31.91, | |
| "grad_norm": 0.06694609671831131, | |
| "learning_rate": 3.6858291457286432e-06, | |
| "loss": 0.0005, | |
| "step": 63350 | |
| }, | |
| { | |
| "epoch": 31.93, | |
| "grad_norm": 0.21812428534030914, | |
| "learning_rate": 3.6833165829145734e-06, | |
| "loss": 0.0003, | |
| "step": 63375 | |
| }, | |
| { | |
| "epoch": 31.94, | |
| "grad_norm": 0.28397077322006226, | |
| "learning_rate": 3.680804020100503e-06, | |
| "loss": 0.0002, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 31.95, | |
| "grad_norm": 1.7889050245285034, | |
| "learning_rate": 3.6782914572864325e-06, | |
| "loss": 0.0005, | |
| "step": 63425 | |
| }, | |
| { | |
| "epoch": 31.96, | |
| "grad_norm": 0.6206020712852478, | |
| "learning_rate": 3.6757788944723622e-06, | |
| "loss": 0.0004, | |
| "step": 63450 | |
| }, | |
| { | |
| "epoch": 31.98, | |
| "grad_norm": 0.1217232197523117, | |
| "learning_rate": 3.6732663316582916e-06, | |
| "loss": 0.0004, | |
| "step": 63475 | |
| }, | |
| { | |
| "epoch": 31.99, | |
| "grad_norm": 0.545870840549469, | |
| "learning_rate": 3.6707537688442213e-06, | |
| "loss": 0.0003, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "grad_norm": 0.3442104160785675, | |
| "learning_rate": 3.668241206030151e-06, | |
| "loss": 0.0004, | |
| "step": 63525 | |
| }, | |
| { | |
| "epoch": 32.02, | |
| "grad_norm": 0.10596601665019989, | |
| "learning_rate": 3.6657286432160804e-06, | |
| "loss": 0.0002, | |
| "step": 63550 | |
| }, | |
| { | |
| "epoch": 32.03, | |
| "grad_norm": 0.12127941101789474, | |
| "learning_rate": 3.6632160804020105e-06, | |
| "loss": 0.0002, | |
| "step": 63575 | |
| }, | |
| { | |
| "epoch": 32.04, | |
| "grad_norm": 0.05940447375178337, | |
| "learning_rate": 3.6607035175879403e-06, | |
| "loss": 0.0004, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 32.05, | |
| "grad_norm": 0.21149925887584686, | |
| "learning_rate": 3.6581909547738696e-06, | |
| "loss": 0.0001, | |
| "step": 63625 | |
| }, | |
| { | |
| "epoch": 32.07, | |
| "grad_norm": 1.6386170387268066, | |
| "learning_rate": 3.6556783919597994e-06, | |
| "loss": 0.0002, | |
| "step": 63650 | |
| }, | |
| { | |
| "epoch": 32.08, | |
| "grad_norm": 1.5819129943847656, | |
| "learning_rate": 3.653165829145729e-06, | |
| "loss": 0.0004, | |
| "step": 63675 | |
| }, | |
| { | |
| "epoch": 32.09, | |
| "grad_norm": 0.5431501865386963, | |
| "learning_rate": 3.6506532663316584e-06, | |
| "loss": 0.0002, | |
| "step": 63700 | |
| }, | |
| { | |
| "epoch": 32.1, | |
| "grad_norm": 0.059780336916446686, | |
| "learning_rate": 3.648140703517588e-06, | |
| "loss": 0.0004, | |
| "step": 63725 | |
| }, | |
| { | |
| "epoch": 32.12, | |
| "grad_norm": 0.3245247006416321, | |
| "learning_rate": 3.6456281407035175e-06, | |
| "loss": 0.0002, | |
| "step": 63750 | |
| }, | |
| { | |
| "epoch": 32.13, | |
| "grad_norm": 0.27430394291877747, | |
| "learning_rate": 3.6431155778894477e-06, | |
| "loss": 0.0002, | |
| "step": 63775 | |
| }, | |
| { | |
| "epoch": 32.14, | |
| "grad_norm": 0.3374156653881073, | |
| "learning_rate": 3.6406030150753774e-06, | |
| "loss": 0.0003, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 32.15, | |
| "grad_norm": 0.5428460836410522, | |
| "learning_rate": 3.638090452261307e-06, | |
| "loss": 0.0002, | |
| "step": 63825 | |
| }, | |
| { | |
| "epoch": 32.17, | |
| "grad_norm": 0.02909483201801777, | |
| "learning_rate": 3.6355778894472365e-06, | |
| "loss": 0.0003, | |
| "step": 63850 | |
| }, | |
| { | |
| "epoch": 32.18, | |
| "grad_norm": 0.05827973410487175, | |
| "learning_rate": 3.6330653266331662e-06, | |
| "loss": 0.0003, | |
| "step": 63875 | |
| }, | |
| { | |
| "epoch": 32.19, | |
| "grad_norm": 0.5720663666725159, | |
| "learning_rate": 3.6305527638190956e-06, | |
| "loss": 0.0003, | |
| "step": 63900 | |
| }, | |
| { | |
| "epoch": 32.2, | |
| "grad_norm": 0.09780346602201462, | |
| "learning_rate": 3.6280402010050253e-06, | |
| "loss": 0.0002, | |
| "step": 63925 | |
| }, | |
| { | |
| "epoch": 32.22, | |
| "grad_norm": 0.061296120285987854, | |
| "learning_rate": 3.625527638190955e-06, | |
| "loss": 0.0002, | |
| "step": 63950 | |
| }, | |
| { | |
| "epoch": 32.23, | |
| "grad_norm": 0.14354734122753143, | |
| "learning_rate": 3.6230150753768844e-06, | |
| "loss": 0.0003, | |
| "step": 63975 | |
| }, | |
| { | |
| "epoch": 32.24, | |
| "grad_norm": 0.1332835853099823, | |
| "learning_rate": 3.6205025125628145e-06, | |
| "loss": 0.0003, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 32.24, | |
| "eval_loss": 0.37142670154571533, | |
| "eval_runtime": 650.391, | |
| "eval_samples_per_second": 2.166, | |
| "eval_steps_per_second": 2.166, | |
| "eval_wer": 22.359045313040472, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 32.25, | |
| "grad_norm": 0.06854286044836044, | |
| "learning_rate": 3.6179899497487443e-06, | |
| "loss": 0.0002, | |
| "step": 64025 | |
| }, | |
| { | |
| "epoch": 32.27, | |
| "grad_norm": 1.202950358390808, | |
| "learning_rate": 3.6154773869346736e-06, | |
| "loss": 0.0002, | |
| "step": 64050 | |
| }, | |
| { | |
| "epoch": 32.28, | |
| "grad_norm": 0.23010912537574768, | |
| "learning_rate": 3.6129648241206034e-06, | |
| "loss": 0.0002, | |
| "step": 64075 | |
| }, | |
| { | |
| "epoch": 32.29, | |
| "grad_norm": 0.044724371284246445, | |
| "learning_rate": 3.610452261306533e-06, | |
| "loss": 0.0002, | |
| "step": 64100 | |
| }, | |
| { | |
| "epoch": 32.3, | |
| "grad_norm": 0.8325422406196594, | |
| "learning_rate": 3.6079396984924624e-06, | |
| "loss": 0.0003, | |
| "step": 64125 | |
| }, | |
| { | |
| "epoch": 32.32, | |
| "grad_norm": 0.28481706976890564, | |
| "learning_rate": 3.605427135678392e-06, | |
| "loss": 0.0002, | |
| "step": 64150 | |
| }, | |
| { | |
| "epoch": 32.33, | |
| "grad_norm": 0.5033039450645447, | |
| "learning_rate": 3.6029145728643215e-06, | |
| "loss": 0.0002, | |
| "step": 64175 | |
| }, | |
| { | |
| "epoch": 32.34, | |
| "grad_norm": 0.07772762328386307, | |
| "learning_rate": 3.6004020100502517e-06, | |
| "loss": 0.0003, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 32.36, | |
| "grad_norm": 0.13087120652198792, | |
| "learning_rate": 3.5978894472361814e-06, | |
| "loss": 0.0003, | |
| "step": 64225 | |
| }, | |
| { | |
| "epoch": 32.37, | |
| "grad_norm": 0.05218727886676788, | |
| "learning_rate": 3.595376884422111e-06, | |
| "loss": 0.0002, | |
| "step": 64250 | |
| }, | |
| { | |
| "epoch": 32.38, | |
| "grad_norm": 0.09140007197856903, | |
| "learning_rate": 3.5928643216080405e-06, | |
| "loss": 0.0002, | |
| "step": 64275 | |
| }, | |
| { | |
| "epoch": 32.39, | |
| "grad_norm": 0.2148062288761139, | |
| "learning_rate": 3.5903517587939702e-06, | |
| "loss": 0.0002, | |
| "step": 64300 | |
| }, | |
| { | |
| "epoch": 32.41, | |
| "grad_norm": 0.3405974805355072, | |
| "learning_rate": 3.5878391959798996e-06, | |
| "loss": 0.0003, | |
| "step": 64325 | |
| }, | |
| { | |
| "epoch": 32.42, | |
| "grad_norm": 0.6667714715003967, | |
| "learning_rate": 3.5853266331658293e-06, | |
| "loss": 0.0006, | |
| "step": 64350 | |
| }, | |
| { | |
| "epoch": 32.43, | |
| "grad_norm": 0.07142732292413712, | |
| "learning_rate": 3.582814070351759e-06, | |
| "loss": 0.0005, | |
| "step": 64375 | |
| }, | |
| { | |
| "epoch": 32.44, | |
| "grad_norm": 0.34170547127723694, | |
| "learning_rate": 3.5803015075376884e-06, | |
| "loss": 0.0003, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 32.46, | |
| "grad_norm": 0.051813945174217224, | |
| "learning_rate": 3.5777889447236185e-06, | |
| "loss": 0.0002, | |
| "step": 64425 | |
| }, | |
| { | |
| "epoch": 32.47, | |
| "grad_norm": 0.035720087587833405, | |
| "learning_rate": 3.5752763819095483e-06, | |
| "loss": 0.0003, | |
| "step": 64450 | |
| }, | |
| { | |
| "epoch": 32.48, | |
| "grad_norm": 0.26840466260910034, | |
| "learning_rate": 3.5727638190954776e-06, | |
| "loss": 0.0002, | |
| "step": 64475 | |
| }, | |
| { | |
| "epoch": 32.49, | |
| "grad_norm": 0.23383192718029022, | |
| "learning_rate": 3.5702512562814074e-06, | |
| "loss": 0.0006, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 32.51, | |
| "grad_norm": 0.6334074139595032, | |
| "learning_rate": 3.567738693467337e-06, | |
| "loss": 0.0003, | |
| "step": 64525 | |
| }, | |
| { | |
| "epoch": 32.52, | |
| "grad_norm": 0.14389067888259888, | |
| "learning_rate": 3.5652261306532664e-06, | |
| "loss": 0.0003, | |
| "step": 64550 | |
| }, | |
| { | |
| "epoch": 32.53, | |
| "grad_norm": 0.1418575793504715, | |
| "learning_rate": 3.562713567839196e-06, | |
| "loss": 0.0005, | |
| "step": 64575 | |
| }, | |
| { | |
| "epoch": 32.54, | |
| "grad_norm": 1.003585934638977, | |
| "learning_rate": 3.5602010050251255e-06, | |
| "loss": 0.0006, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 32.56, | |
| "grad_norm": 0.30060967803001404, | |
| "learning_rate": 3.5576884422110557e-06, | |
| "loss": 0.0004, | |
| "step": 64625 | |
| }, | |
| { | |
| "epoch": 32.57, | |
| "grad_norm": 0.2555244266986847, | |
| "learning_rate": 3.5551758793969854e-06, | |
| "loss": 0.0005, | |
| "step": 64650 | |
| }, | |
| { | |
| "epoch": 32.58, | |
| "grad_norm": 0.5695326328277588, | |
| "learning_rate": 3.552663316582915e-06, | |
| "loss": 0.0005, | |
| "step": 64675 | |
| }, | |
| { | |
| "epoch": 32.59, | |
| "grad_norm": 1.1138029098510742, | |
| "learning_rate": 3.5501507537688445e-06, | |
| "loss": 0.0005, | |
| "step": 64700 | |
| }, | |
| { | |
| "epoch": 32.61, | |
| "grad_norm": 0.999645471572876, | |
| "learning_rate": 3.5476381909547742e-06, | |
| "loss": 0.0002, | |
| "step": 64725 | |
| }, | |
| { | |
| "epoch": 32.62, | |
| "grad_norm": 0.08828813582658768, | |
| "learning_rate": 3.5451256281407036e-06, | |
| "loss": 0.0005, | |
| "step": 64750 | |
| }, | |
| { | |
| "epoch": 32.63, | |
| "grad_norm": 0.41186901926994324, | |
| "learning_rate": 3.5426130653266333e-06, | |
| "loss": 0.0004, | |
| "step": 64775 | |
| }, | |
| { | |
| "epoch": 32.64, | |
| "grad_norm": 0.2822958528995514, | |
| "learning_rate": 3.540100502512563e-06, | |
| "loss": 0.0004, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 32.66, | |
| "grad_norm": 0.0504324808716774, | |
| "learning_rate": 3.5375879396984932e-06, | |
| "loss": 0.0003, | |
| "step": 64825 | |
| }, | |
| { | |
| "epoch": 32.67, | |
| "grad_norm": 0.19442777335643768, | |
| "learning_rate": 3.5350753768844225e-06, | |
| "loss": 0.0003, | |
| "step": 64850 | |
| }, | |
| { | |
| "epoch": 32.68, | |
| "grad_norm": 0.16309334337711334, | |
| "learning_rate": 3.5325628140703523e-06, | |
| "loss": 0.0005, | |
| "step": 64875 | |
| }, | |
| { | |
| "epoch": 32.7, | |
| "grad_norm": 0.21436728537082672, | |
| "learning_rate": 3.5300502512562816e-06, | |
| "loss": 0.0005, | |
| "step": 64900 | |
| }, | |
| { | |
| "epoch": 32.71, | |
| "grad_norm": 0.6013718843460083, | |
| "learning_rate": 3.5275376884422114e-06, | |
| "loss": 0.0005, | |
| "step": 64925 | |
| }, | |
| { | |
| "epoch": 32.72, | |
| "grad_norm": 0.22422859072685242, | |
| "learning_rate": 3.525025125628141e-06, | |
| "loss": 0.0003, | |
| "step": 64950 | |
| }, | |
| { | |
| "epoch": 32.73, | |
| "grad_norm": 0.1287311613559723, | |
| "learning_rate": 3.5225125628140704e-06, | |
| "loss": 0.0003, | |
| "step": 64975 | |
| }, | |
| { | |
| "epoch": 32.75, | |
| "grad_norm": 0.11317762732505798, | |
| "learning_rate": 3.52e-06, | |
| "loss": 0.0006, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 32.75, | |
| "eval_loss": 0.37727558612823486, | |
| "eval_runtime": 645.9956, | |
| "eval_samples_per_second": 2.181, | |
| "eval_steps_per_second": 2.181, | |
| "eval_wer": 22.953995157384988, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 32.76, | |
| "grad_norm": 0.30392730236053467, | |
| "learning_rate": 3.5174874371859295e-06, | |
| "loss": 0.0003, | |
| "step": 65025 | |
| }, | |
| { | |
| "epoch": 32.77, | |
| "grad_norm": 0.1481235772371292, | |
| "learning_rate": 3.5149748743718597e-06, | |
| "loss": 0.0006, | |
| "step": 65050 | |
| }, | |
| { | |
| "epoch": 32.78, | |
| "grad_norm": 0.7122224569320679, | |
| "learning_rate": 3.5124623115577894e-06, | |
| "loss": 0.0005, | |
| "step": 65075 | |
| }, | |
| { | |
| "epoch": 32.8, | |
| "grad_norm": 0.9053061604499817, | |
| "learning_rate": 3.509949748743719e-06, | |
| "loss": 0.0004, | |
| "step": 65100 | |
| }, | |
| { | |
| "epoch": 32.81, | |
| "grad_norm": 1.0469900369644165, | |
| "learning_rate": 3.5074371859296485e-06, | |
| "loss": 0.0004, | |
| "step": 65125 | |
| }, | |
| { | |
| "epoch": 32.82, | |
| "grad_norm": 1.669203519821167, | |
| "learning_rate": 3.5049246231155782e-06, | |
| "loss": 0.0004, | |
| "step": 65150 | |
| }, | |
| { | |
| "epoch": 32.83, | |
| "grad_norm": 1.147189974784851, | |
| "learning_rate": 3.5024120603015076e-06, | |
| "loss": 0.0006, | |
| "step": 65175 | |
| }, | |
| { | |
| "epoch": 32.85, | |
| "grad_norm": 0.0693323016166687, | |
| "learning_rate": 3.4998994974874373e-06, | |
| "loss": 0.0004, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 32.86, | |
| "grad_norm": 0.32327863574028015, | |
| "learning_rate": 3.497386934673367e-06, | |
| "loss": 0.0006, | |
| "step": 65225 | |
| }, | |
| { | |
| "epoch": 32.87, | |
| "grad_norm": 0.20139850676059723, | |
| "learning_rate": 3.494874371859297e-06, | |
| "loss": 0.0005, | |
| "step": 65250 | |
| }, | |
| { | |
| "epoch": 32.88, | |
| "grad_norm": 0.3941897749900818, | |
| "learning_rate": 3.4923618090452265e-06, | |
| "loss": 0.0003, | |
| "step": 65275 | |
| }, | |
| { | |
| "epoch": 32.9, | |
| "grad_norm": 0.036489930003881454, | |
| "learning_rate": 3.4898492462311563e-06, | |
| "loss": 0.0006, | |
| "step": 65300 | |
| }, | |
| { | |
| "epoch": 32.91, | |
| "grad_norm": 0.09961450099945068, | |
| "learning_rate": 3.4873366834170856e-06, | |
| "loss": 0.0004, | |
| "step": 65325 | |
| }, | |
| { | |
| "epoch": 32.92, | |
| "grad_norm": 0.10360651463270187, | |
| "learning_rate": 3.4848241206030154e-06, | |
| "loss": 0.0004, | |
| "step": 65350 | |
| }, | |
| { | |
| "epoch": 32.93, | |
| "grad_norm": 1.0170046091079712, | |
| "learning_rate": 3.482311557788945e-06, | |
| "loss": 0.0005, | |
| "step": 65375 | |
| }, | |
| { | |
| "epoch": 32.95, | |
| "grad_norm": 0.1749623417854309, | |
| "learning_rate": 3.4797989949748744e-06, | |
| "loss": 0.0003, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 32.96, | |
| "grad_norm": 1.7965832948684692, | |
| "learning_rate": 3.477286432160804e-06, | |
| "loss": 0.0004, | |
| "step": 65425 | |
| }, | |
| { | |
| "epoch": 32.97, | |
| "grad_norm": 0.4475008547306061, | |
| "learning_rate": 3.4747738693467344e-06, | |
| "loss": 0.0004, | |
| "step": 65450 | |
| }, | |
| { | |
| "epoch": 32.98, | |
| "grad_norm": 0.29991576075553894, | |
| "learning_rate": 3.4722613065326637e-06, | |
| "loss": 0.0004, | |
| "step": 65475 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "grad_norm": 0.0659874826669693, | |
| "learning_rate": 3.4697487437185934e-06, | |
| "loss": 0.0005, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 33.01, | |
| "grad_norm": 0.15273946523666382, | |
| "learning_rate": 3.467336683417086e-06, | |
| "loss": 0.0004, | |
| "step": 65525 | |
| }, | |
| { | |
| "epoch": 33.02, | |
| "grad_norm": 0.09499111026525497, | |
| "learning_rate": 3.464824120603015e-06, | |
| "loss": 0.0005, | |
| "step": 65550 | |
| }, | |
| { | |
| "epoch": 33.04, | |
| "grad_norm": 0.052725620567798615, | |
| "learning_rate": 3.462311557788945e-06, | |
| "loss": 0.0002, | |
| "step": 65575 | |
| }, | |
| { | |
| "epoch": 33.05, | |
| "grad_norm": 0.30198460817337036, | |
| "learning_rate": 3.4597989949748746e-06, | |
| "loss": 0.0002, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 33.06, | |
| "grad_norm": 0.12597815692424774, | |
| "learning_rate": 3.457286432160804e-06, | |
| "loss": 0.0003, | |
| "step": 65625 | |
| }, | |
| { | |
| "epoch": 33.07, | |
| "grad_norm": 0.7220773100852966, | |
| "learning_rate": 3.454773869346734e-06, | |
| "loss": 0.0002, | |
| "step": 65650 | |
| }, | |
| { | |
| "epoch": 33.09, | |
| "grad_norm": 0.14168275892734528, | |
| "learning_rate": 3.452261306532664e-06, | |
| "loss": 0.0002, | |
| "step": 65675 | |
| }, | |
| { | |
| "epoch": 33.1, | |
| "grad_norm": 0.06495074182748795, | |
| "learning_rate": 3.449748743718593e-06, | |
| "loss": 0.0002, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 33.11, | |
| "grad_norm": 0.06904838979244232, | |
| "learning_rate": 3.447236180904523e-06, | |
| "loss": 0.0001, | |
| "step": 65725 | |
| }, | |
| { | |
| "epoch": 33.12, | |
| "grad_norm": 0.08821985125541687, | |
| "learning_rate": 3.4447236180904527e-06, | |
| "loss": 0.0001, | |
| "step": 65750 | |
| }, | |
| { | |
| "epoch": 33.14, | |
| "grad_norm": 0.08516985177993774, | |
| "learning_rate": 3.442211055276382e-06, | |
| "loss": 0.0002, | |
| "step": 65775 | |
| }, | |
| { | |
| "epoch": 33.15, | |
| "grad_norm": 0.05491633340716362, | |
| "learning_rate": 3.4396984924623118e-06, | |
| "loss": 0.0002, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 33.16, | |
| "grad_norm": 0.09583797305822372, | |
| "learning_rate": 3.437185929648241e-06, | |
| "loss": 0.0001, | |
| "step": 65825 | |
| }, | |
| { | |
| "epoch": 33.17, | |
| "grad_norm": 0.046035125851631165, | |
| "learning_rate": 3.4346733668341712e-06, | |
| "loss": 0.0002, | |
| "step": 65850 | |
| }, | |
| { | |
| "epoch": 33.19, | |
| "grad_norm": 0.04913106560707092, | |
| "learning_rate": 3.432160804020101e-06, | |
| "loss": 0.0002, | |
| "step": 65875 | |
| }, | |
| { | |
| "epoch": 33.2, | |
| "grad_norm": 0.0908324271440506, | |
| "learning_rate": 3.4296482412060307e-06, | |
| "loss": 0.0002, | |
| "step": 65900 | |
| }, | |
| { | |
| "epoch": 33.21, | |
| "grad_norm": 0.1352541148662567, | |
| "learning_rate": 3.42713567839196e-06, | |
| "loss": 0.0001, | |
| "step": 65925 | |
| }, | |
| { | |
| "epoch": 33.22, | |
| "grad_norm": 0.12912270426750183, | |
| "learning_rate": 3.42462311557789e-06, | |
| "loss": 0.0002, | |
| "step": 65950 | |
| }, | |
| { | |
| "epoch": 33.24, | |
| "grad_norm": 0.19946007430553436, | |
| "learning_rate": 3.422110552763819e-06, | |
| "loss": 0.0001, | |
| "step": 65975 | |
| }, | |
| { | |
| "epoch": 33.25, | |
| "grad_norm": 0.15442493557929993, | |
| "learning_rate": 3.419597989949749e-06, | |
| "loss": 0.0001, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 33.25, | |
| "eval_loss": 0.37603169679641724, | |
| "eval_runtime": 780.8153, | |
| "eval_samples_per_second": 1.805, | |
| "eval_steps_per_second": 1.805, | |
| "eval_wer": 22.234520927014874, | |
| "step": 66000 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 100000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 51, | |
| "save_steps": 1000, | |
| "total_flos": 2.0547646783488e+20, | |
| "train_batch_size": 48, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |