| { | |
| "best_global_step": 8500, | |
| "best_metric": 0.3142754137516022, | |
| "best_model_checkpoint": "./Wav2vec2-fula/checkpoint-8500", | |
| "epoch": 7.565045540470871, | |
| "eval_steps": 500, | |
| "global_step": 11000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.017185083347654236, | |
| "grad_norm": 10.79065990447998, | |
| "learning_rate": 1.2000000000000002e-06, | |
| "loss": 14.779, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.03437016669530847, | |
| "grad_norm": 11.638880729675293, | |
| "learning_rate": 2.4000000000000003e-06, | |
| "loss": 13.4402, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05155525004296271, | |
| "grad_norm": 12.49268913269043, | |
| "learning_rate": 3.6499999999999998e-06, | |
| "loss": 14.1815, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.06874033339061694, | |
| "grad_norm": 13.672736167907715, | |
| "learning_rate": 4.9000000000000005e-06, | |
| "loss": 12.3835, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.08592541673827118, | |
| "grad_norm": 13.192852973937988, | |
| "learning_rate": 6.15e-06, | |
| "loss": 11.3532, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.10311050008592541, | |
| "grad_norm": 13.134611129760742, | |
| "learning_rate": 7.4e-06, | |
| "loss": 7.4199, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.12029558343357966, | |
| "grad_norm": 10.945942878723145, | |
| "learning_rate": 8.65e-06, | |
| "loss": 6.3555, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.13748066678123388, | |
| "grad_norm": 10.094327926635742, | |
| "learning_rate": 9.900000000000002e-06, | |
| "loss": 5.2388, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1546657501288881, | |
| "grad_norm": 9.148414611816406, | |
| "learning_rate": 1.115e-05, | |
| "loss": 5.0771, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.17185083347654237, | |
| "grad_norm": 7.876718997955322, | |
| "learning_rate": 1.24e-05, | |
| "loss": 4.6181, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.1890359168241966, | |
| "grad_norm": 7.483435153961182, | |
| "learning_rate": 1.3650000000000001e-05, | |
| "loss": 4.5182, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.20622100017185083, | |
| "grad_norm": 6.882397174835205, | |
| "learning_rate": 1.49e-05, | |
| "loss": 4.1879, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.22340608351950508, | |
| "grad_norm": 6.444328308105469, | |
| "learning_rate": 1.6150000000000003e-05, | |
| "loss": 4.1217, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.2405911668671593, | |
| "grad_norm": 4.407646179199219, | |
| "learning_rate": 1.74e-05, | |
| "loss": 3.8535, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.25777625021481354, | |
| "grad_norm": 4.332294940948486, | |
| "learning_rate": 1.865e-05, | |
| "loss": 3.7725, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.27496133356246777, | |
| "grad_norm": 3.5310115814208984, | |
| "learning_rate": 1.9900000000000003e-05, | |
| "loss": 3.5538, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.292146416910122, | |
| "grad_norm": 2.884195566177368, | |
| "learning_rate": 2.115e-05, | |
| "loss": 3.4667, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.3093315002577762, | |
| "grad_norm": 2.221975564956665, | |
| "learning_rate": 2.2400000000000002e-05, | |
| "loss": 3.3157, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.3265165836054305, | |
| "grad_norm": 1.393004059791565, | |
| "learning_rate": 2.365e-05, | |
| "loss": 3.2574, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.34370166695308474, | |
| "grad_norm": 1.5566725730895996, | |
| "learning_rate": 2.4900000000000002e-05, | |
| "loss": 3.1344, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.34370166695308474, | |
| "eval_loss": 3.093510150909424, | |
| "eval_runtime": 148.9649, | |
| "eval_samples_per_second": 8.223, | |
| "eval_steps_per_second": 1.034, | |
| "eval_wer": 1.0, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.36088675030073897, | |
| "grad_norm": 1.1272865533828735, | |
| "learning_rate": 2.6150000000000002e-05, | |
| "loss": 3.0829, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.3780718336483932, | |
| "grad_norm": 0.8798616528511047, | |
| "learning_rate": 2.7400000000000002e-05, | |
| "loss": 3.0138, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.3952569169960474, | |
| "grad_norm": 1.233052134513855, | |
| "learning_rate": 2.865e-05, | |
| "loss": 2.9976, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.41244200034370165, | |
| "grad_norm": 0.4763319492340088, | |
| "learning_rate": 2.9900000000000002e-05, | |
| "loss": 2.9609, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.4296270836913559, | |
| "grad_norm": 0.35058021545410156, | |
| "learning_rate": 3.115e-05, | |
| "loss": 2.9375, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.44681216703901017, | |
| "grad_norm": 0.9661968946456909, | |
| "learning_rate": 3.24e-05, | |
| "loss": 2.8961, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.4639972503866644, | |
| "grad_norm": 1.0027278661727905, | |
| "learning_rate": 3.3650000000000005e-05, | |
| "loss": 2.8169, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.4811823337343186, | |
| "grad_norm": 0.6780478358268738, | |
| "learning_rate": 3.49e-05, | |
| "loss": 2.674, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.49836741708197285, | |
| "grad_norm": 0.7937625646591187, | |
| "learning_rate": 3.615e-05, | |
| "loss": 2.4667, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.5155525004296271, | |
| "grad_norm": 1.0229036808013916, | |
| "learning_rate": 3.74e-05, | |
| "loss": 2.2186, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5327375837772813, | |
| "grad_norm": 1.1023578643798828, | |
| "learning_rate": 3.8650000000000004e-05, | |
| "loss": 1.9797, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.5499226671249355, | |
| "grad_norm": 1.084370493888855, | |
| "learning_rate": 3.99e-05, | |
| "loss": 1.6958, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5671077504725898, | |
| "grad_norm": 0.8571304082870483, | |
| "learning_rate": 4.115e-05, | |
| "loss": 1.3521, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.584292833820244, | |
| "grad_norm": 1.0564861297607422, | |
| "learning_rate": 4.24e-05, | |
| "loss": 1.1168, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.6014779171678982, | |
| "grad_norm": 0.8157157301902771, | |
| "learning_rate": 4.3650000000000004e-05, | |
| "loss": 0.9829, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.6186630005155525, | |
| "grad_norm": 1.098561406135559, | |
| "learning_rate": 4.49e-05, | |
| "loss": 0.904, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.6358480838632067, | |
| "grad_norm": 0.6373503804206848, | |
| "learning_rate": 4.6150000000000004e-05, | |
| "loss": 0.8686, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.653033167210861, | |
| "grad_norm": 1.2277697324752808, | |
| "learning_rate": 4.74e-05, | |
| "loss": 0.7969, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.6702182505585152, | |
| "grad_norm": 0.5907439589500427, | |
| "learning_rate": 4.8650000000000003e-05, | |
| "loss": 0.7524, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.6874033339061695, | |
| "grad_norm": 0.7881184220314026, | |
| "learning_rate": 4.99e-05, | |
| "loss": 0.7323, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.6874033339061695, | |
| "eval_loss": 0.630436360836029, | |
| "eval_runtime": 155.009, | |
| "eval_samples_per_second": 7.903, | |
| "eval_steps_per_second": 0.993, | |
| "eval_wer": 0.711954217888936, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7045884172538237, | |
| "grad_norm": 0.7166395783424377, | |
| "learning_rate": 4.99866651205937e-05, | |
| "loss": 0.705, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.7217735006014779, | |
| "grad_norm": 0.7382345199584961, | |
| "learning_rate": 4.9972170686456406e-05, | |
| "loss": 0.7207, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.7389585839491322, | |
| "grad_norm": 0.6300435066223145, | |
| "learning_rate": 4.995767625231911e-05, | |
| "loss": 0.7097, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.7561436672967864, | |
| "grad_norm": 0.7419637441635132, | |
| "learning_rate": 4.994318181818182e-05, | |
| "loss": 0.6865, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.7733287506444406, | |
| "grad_norm": 0.5746726989746094, | |
| "learning_rate": 4.9928687384044535e-05, | |
| "loss": 0.6694, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.7905138339920948, | |
| "grad_norm": 0.7999011874198914, | |
| "learning_rate": 4.991419294990724e-05, | |
| "loss": 0.6729, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.8076989173397491, | |
| "grad_norm": 0.5744255781173706, | |
| "learning_rate": 4.989969851576995e-05, | |
| "loss": 0.6253, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.8248840006874033, | |
| "grad_norm": 0.8337430357933044, | |
| "learning_rate": 4.988520408163265e-05, | |
| "loss": 0.6616, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.8420690840350575, | |
| "grad_norm": 0.5431417226791382, | |
| "learning_rate": 4.9870709647495364e-05, | |
| "loss": 0.6129, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.8592541673827118, | |
| "grad_norm": 0.8542420864105225, | |
| "learning_rate": 4.985621521335807e-05, | |
| "loss": 0.6252, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.876439250730366, | |
| "grad_norm": 0.5819442272186279, | |
| "learning_rate": 4.984172077922078e-05, | |
| "loss": 0.5723, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.8936243340780203, | |
| "grad_norm": 0.9629399180412292, | |
| "learning_rate": 4.982722634508349e-05, | |
| "loss": 0.582, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.9108094174256746, | |
| "grad_norm": 0.5698046088218689, | |
| "learning_rate": 4.98127319109462e-05, | |
| "loss": 0.5745, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.9279945007733288, | |
| "grad_norm": 0.893267035484314, | |
| "learning_rate": 4.979823747680891e-05, | |
| "loss": 0.5887, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.945179584120983, | |
| "grad_norm": 0.4911533296108246, | |
| "learning_rate": 4.9783743042671616e-05, | |
| "loss": 0.5657, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.9623646674686372, | |
| "grad_norm": 0.9226717948913574, | |
| "learning_rate": 4.976924860853432e-05, | |
| "loss": 0.5615, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.9795497508162915, | |
| "grad_norm": 0.5409913659095764, | |
| "learning_rate": 4.975475417439703e-05, | |
| "loss": 0.5562, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.9967348341639457, | |
| "grad_norm": 0.7855440974235535, | |
| "learning_rate": 4.9740259740259745e-05, | |
| "loss": 0.5641, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.0144354700120295, | |
| "grad_norm": 0.48342418670654297, | |
| "learning_rate": 4.972576530612245e-05, | |
| "loss": 0.5216, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 1.0316205533596838, | |
| "grad_norm": 0.7128458023071289, | |
| "learning_rate": 4.971127087198516e-05, | |
| "loss": 0.5416, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.0316205533596838, | |
| "eval_loss": 0.47852450609207153, | |
| "eval_runtime": 156.3389, | |
| "eval_samples_per_second": 7.836, | |
| "eval_steps_per_second": 0.985, | |
| "eval_wer": 0.6490568037303942, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.048805636707338, | |
| "grad_norm": 0.4331744909286499, | |
| "learning_rate": 4.969677643784787e-05, | |
| "loss": 0.5337, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 1.0659907200549923, | |
| "grad_norm": 0.7347924113273621, | |
| "learning_rate": 4.968228200371058e-05, | |
| "loss": 0.5115, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.0831758034026464, | |
| "grad_norm": 0.47687920928001404, | |
| "learning_rate": 4.966778756957329e-05, | |
| "loss": 0.5525, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 1.1003608867503007, | |
| "grad_norm": 0.6743185520172119, | |
| "learning_rate": 4.9653293135435996e-05, | |
| "loss": 0.4931, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.117545970097955, | |
| "grad_norm": 0.5449199676513672, | |
| "learning_rate": 4.9638798701298704e-05, | |
| "loss": 0.4773, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 1.1347310534456092, | |
| "grad_norm": 0.6370054483413696, | |
| "learning_rate": 4.962430426716141e-05, | |
| "loss": 0.4937, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.1519161367932633, | |
| "grad_norm": 0.5630868077278137, | |
| "learning_rate": 4.9609809833024125e-05, | |
| "loss": 0.541, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 1.1691012201409177, | |
| "grad_norm": 0.6498411297798157, | |
| "learning_rate": 4.959531539888683e-05, | |
| "loss": 0.5057, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.186286303488572, | |
| "grad_norm": 0.468143105506897, | |
| "learning_rate": 4.9580820964749533e-05, | |
| "loss": 0.5155, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 1.2034713868362261, | |
| "grad_norm": 0.7214887738227844, | |
| "learning_rate": 4.956632653061225e-05, | |
| "loss": 0.5132, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.2206564701838805, | |
| "grad_norm": 0.4510962963104248, | |
| "learning_rate": 4.9551832096474955e-05, | |
| "loss": 0.4643, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 1.2378415535315346, | |
| "grad_norm": 0.6587559580802917, | |
| "learning_rate": 4.953733766233766e-05, | |
| "loss": 0.4701, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.255026636879189, | |
| "grad_norm": 0.5024493336677551, | |
| "learning_rate": 4.952284322820037e-05, | |
| "loss": 0.5198, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 1.272211720226843, | |
| "grad_norm": 0.7172912359237671, | |
| "learning_rate": 4.950834879406308e-05, | |
| "loss": 0.4555, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.2893968035744974, | |
| "grad_norm": 0.5447876453399658, | |
| "learning_rate": 4.949385435992579e-05, | |
| "loss": 0.5041, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 1.3065818869221515, | |
| "grad_norm": 0.6178200244903564, | |
| "learning_rate": 4.94793599257885e-05, | |
| "loss": 0.4686, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.3237669702698058, | |
| "grad_norm": 0.49054816365242004, | |
| "learning_rate": 4.9464865491651207e-05, | |
| "loss": 0.4803, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 1.34095205361746, | |
| "grad_norm": 0.6802988648414612, | |
| "learning_rate": 4.9450371057513914e-05, | |
| "loss": 0.4644, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.3581371369651143, | |
| "grad_norm": 0.41138285398483276, | |
| "learning_rate": 4.943587662337663e-05, | |
| "loss": 0.513, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 1.3753222203127686, | |
| "grad_norm": 0.8374213576316833, | |
| "learning_rate": 4.9421382189239336e-05, | |
| "loss": 0.4479, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.3753222203127686, | |
| "eval_loss": 0.42023956775665283, | |
| "eval_runtime": 156.1936, | |
| "eval_samples_per_second": 7.843, | |
| "eval_steps_per_second": 0.986, | |
| "eval_wer": 0.6206549385332768, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.3925073036604227, | |
| "grad_norm": 0.7108762860298157, | |
| "learning_rate": 4.940688775510204e-05, | |
| "loss": 0.465, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 1.4096923870080769, | |
| "grad_norm": 0.6862374544143677, | |
| "learning_rate": 4.939239332096475e-05, | |
| "loss": 0.4513, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.4268774703557312, | |
| "grad_norm": 0.5275014638900757, | |
| "learning_rate": 4.9377898886827465e-05, | |
| "loss": 0.5135, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 1.4440625537033855, | |
| "grad_norm": 0.7356075644493103, | |
| "learning_rate": 4.936340445269017e-05, | |
| "loss": 0.4843, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.4612476370510397, | |
| "grad_norm": 0.46985840797424316, | |
| "learning_rate": 4.934891001855288e-05, | |
| "loss": 0.544, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 1.478432720398694, | |
| "grad_norm": 0.8410568237304688, | |
| "learning_rate": 4.933441558441559e-05, | |
| "loss": 0.453, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.4956178037463481, | |
| "grad_norm": 0.5153818130493164, | |
| "learning_rate": 4.9319921150278294e-05, | |
| "loss": 0.5145, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 1.5128028870940025, | |
| "grad_norm": 0.8318967223167419, | |
| "learning_rate": 4.930542671614101e-05, | |
| "loss": 0.4583, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.5299879704416566, | |
| "grad_norm": 0.5471023321151733, | |
| "learning_rate": 4.9290932282003716e-05, | |
| "loss": 0.4622, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 1.547173053789311, | |
| "grad_norm": 0.6953750848770142, | |
| "learning_rate": 4.927643784786642e-05, | |
| "loss": 0.4712, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.5643581371369653, | |
| "grad_norm": 0.5455946326255798, | |
| "learning_rate": 4.926194341372913e-05, | |
| "loss": 0.4702, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 1.5815432204846194, | |
| "grad_norm": 0.6755653023719788, | |
| "learning_rate": 4.924744897959184e-05, | |
| "loss": 0.4356, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.5987283038322735, | |
| "grad_norm": 0.47134584188461304, | |
| "learning_rate": 4.9232954545454546e-05, | |
| "loss": 0.4333, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 1.6159133871799278, | |
| "grad_norm": 0.6077346205711365, | |
| "learning_rate": 4.921846011131725e-05, | |
| "loss": 0.4432, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.6330984705275822, | |
| "grad_norm": 0.5316899418830872, | |
| "learning_rate": 4.920396567717996e-05, | |
| "loss": 0.45, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 1.6502835538752363, | |
| "grad_norm": 0.79489666223526, | |
| "learning_rate": 4.9189471243042675e-05, | |
| "loss": 0.4637, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.6674686372228904, | |
| "grad_norm": 0.5166532397270203, | |
| "learning_rate": 4.917497680890538e-05, | |
| "loss": 0.4375, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 1.6846537205705447, | |
| "grad_norm": 0.6109660863876343, | |
| "learning_rate": 4.916048237476809e-05, | |
| "loss": 0.418, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.701838803918199, | |
| "grad_norm": 0.6634232401847839, | |
| "learning_rate": 4.91459879406308e-05, | |
| "loss": 0.4315, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 1.7190238872658532, | |
| "grad_norm": 0.5985594987869263, | |
| "learning_rate": 4.913149350649351e-05, | |
| "loss": 0.4541, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.7190238872658532, | |
| "eval_loss": 0.3850683569908142, | |
| "eval_runtime": 155.7689, | |
| "eval_samples_per_second": 7.864, | |
| "eval_steps_per_second": 0.989, | |
| "eval_wer": 0.6006252649427724, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.7362089706135073, | |
| "grad_norm": 0.5531134605407715, | |
| "learning_rate": 4.911699907235622e-05, | |
| "loss": 0.4558, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 1.7533940539611617, | |
| "grad_norm": 0.594007670879364, | |
| "learning_rate": 4.9102504638218926e-05, | |
| "loss": 0.4422, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.770579137308816, | |
| "grad_norm": 0.4865548312664032, | |
| "learning_rate": 4.9088010204081634e-05, | |
| "loss": 0.4216, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 1.7877642206564701, | |
| "grad_norm": 0.5752180814743042, | |
| "learning_rate": 4.907351576994435e-05, | |
| "loss": 0.429, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.8049493040041245, | |
| "grad_norm": 0.4255332350730896, | |
| "learning_rate": 4.9059021335807055e-05, | |
| "loss": 0.4074, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 1.8221343873517788, | |
| "grad_norm": 0.6160002946853638, | |
| "learning_rate": 4.904452690166976e-05, | |
| "loss": 0.4388, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.839319470699433, | |
| "grad_norm": 0.49583593010902405, | |
| "learning_rate": 4.903003246753247e-05, | |
| "loss": 0.4211, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 1.856504554047087, | |
| "grad_norm": 0.7490856647491455, | |
| "learning_rate": 4.901553803339518e-05, | |
| "loss": 0.4559, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.8736896373947414, | |
| "grad_norm": 0.4634897708892822, | |
| "learning_rate": 4.900104359925789e-05, | |
| "loss": 0.4323, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 1.8908747207423957, | |
| "grad_norm": 1.0682307481765747, | |
| "learning_rate": 4.89865491651206e-05, | |
| "loss": 0.4361, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.9080598040900498, | |
| "grad_norm": 0.41067609190940857, | |
| "learning_rate": 4.89720547309833e-05, | |
| "loss": 0.4627, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 1.925244887437704, | |
| "grad_norm": 0.674846887588501, | |
| "learning_rate": 4.8957560296846014e-05, | |
| "loss": 0.4357, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.9424299707853583, | |
| "grad_norm": 0.4671356678009033, | |
| "learning_rate": 4.894306586270872e-05, | |
| "loss": 0.429, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 1.9596150541330126, | |
| "grad_norm": 0.6439480781555176, | |
| "learning_rate": 4.892857142857143e-05, | |
| "loss": 0.3965, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.9768001374806667, | |
| "grad_norm": 0.5034067034721375, | |
| "learning_rate": 4.8914076994434137e-05, | |
| "loss": 0.4131, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 1.9939852208283209, | |
| "grad_norm": 0.67892986536026, | |
| "learning_rate": 4.8899582560296844e-05, | |
| "loss": 0.4306, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.011685856676405, | |
| "grad_norm": 0.4826701879501343, | |
| "learning_rate": 4.888508812615956e-05, | |
| "loss": 0.4437, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 2.028870940024059, | |
| "grad_norm": 0.5658535361289978, | |
| "learning_rate": 4.8870593692022266e-05, | |
| "loss": 0.3842, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.0460560233717136, | |
| "grad_norm": 0.5490546226501465, | |
| "learning_rate": 4.885609925788497e-05, | |
| "loss": 0.3872, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 2.0632411067193677, | |
| "grad_norm": 0.59776371717453, | |
| "learning_rate": 4.884160482374768e-05, | |
| "loss": 0.365, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.0632411067193677, | |
| "eval_loss": 0.37011492252349854, | |
| "eval_runtime": 150.8939, | |
| "eval_samples_per_second": 8.118, | |
| "eval_steps_per_second": 1.021, | |
| "eval_wer": 0.588543874523103, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.080426190067022, | |
| "grad_norm": 0.4145926237106323, | |
| "learning_rate": 4.8827110389610395e-05, | |
| "loss": 0.4335, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 2.097611273414676, | |
| "grad_norm": 0.6833218336105347, | |
| "learning_rate": 4.88126159554731e-05, | |
| "loss": 0.3801, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 2.1147963567623305, | |
| "grad_norm": 0.5245340466499329, | |
| "learning_rate": 4.879812152133581e-05, | |
| "loss": 0.4167, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 2.1319814401099846, | |
| "grad_norm": 0.5739388465881348, | |
| "learning_rate": 4.878362708719852e-05, | |
| "loss": 0.3793, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.1491665234576387, | |
| "grad_norm": 0.502185046672821, | |
| "learning_rate": 4.876913265306123e-05, | |
| "loss": 0.4444, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 2.166351606805293, | |
| "grad_norm": 0.635421633720398, | |
| "learning_rate": 4.875463821892394e-05, | |
| "loss": 0.3668, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 2.1835366901529474, | |
| "grad_norm": 0.4521035850048065, | |
| "learning_rate": 4.8740143784786646e-05, | |
| "loss": 0.3772, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 2.2007217735006015, | |
| "grad_norm": 0.7126047015190125, | |
| "learning_rate": 4.8725649350649354e-05, | |
| "loss": 0.374, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.2179068568482556, | |
| "grad_norm": 0.44621542096138, | |
| "learning_rate": 4.871115491651206e-05, | |
| "loss": 0.448, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 2.23509194019591, | |
| "grad_norm": 0.6418918967247009, | |
| "learning_rate": 4.8696660482374775e-05, | |
| "loss": 0.4099, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.2522770235435643, | |
| "grad_norm": 0.6630382537841797, | |
| "learning_rate": 4.8682166048237476e-05, | |
| "loss": 0.4028, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 2.2694621068912184, | |
| "grad_norm": 0.6084064841270447, | |
| "learning_rate": 4.866767161410018e-05, | |
| "loss": 0.3737, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.2866471902388725, | |
| "grad_norm": 0.49679034948349, | |
| "learning_rate": 4.86531771799629e-05, | |
| "loss": 0.4054, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 2.3038322735865266, | |
| "grad_norm": 0.5616837739944458, | |
| "learning_rate": 4.8638682745825605e-05, | |
| "loss": 0.3562, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.321017356934181, | |
| "grad_norm": 0.501042366027832, | |
| "learning_rate": 4.862418831168831e-05, | |
| "loss": 0.3613, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 2.3382024402818353, | |
| "grad_norm": 0.7408941984176636, | |
| "learning_rate": 4.860969387755102e-05, | |
| "loss": 0.3774, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.3553875236294894, | |
| "grad_norm": 0.4748440086841583, | |
| "learning_rate": 4.859519944341373e-05, | |
| "loss": 0.4066, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 2.372572606977144, | |
| "grad_norm": 0.6289178729057312, | |
| "learning_rate": 4.858070500927644e-05, | |
| "loss": 0.3748, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.389757690324798, | |
| "grad_norm": 0.4348820149898529, | |
| "learning_rate": 4.856621057513915e-05, | |
| "loss": 0.4605, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 2.4069427736724522, | |
| "grad_norm": 0.5051092505455017, | |
| "learning_rate": 4.8551716141001856e-05, | |
| "loss": 0.3433, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.4069427736724522, | |
| "eval_loss": 0.364750474691391, | |
| "eval_runtime": 151.4529, | |
| "eval_samples_per_second": 8.088, | |
| "eval_steps_per_second": 1.017, | |
| "eval_wer": 0.5796947859262399, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.4241278570201064, | |
| "grad_norm": 0.5361665487289429, | |
| "learning_rate": 4.8537221706864564e-05, | |
| "loss": 0.4373, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 2.441312940367761, | |
| "grad_norm": 0.5831518769264221, | |
| "learning_rate": 4.852272727272728e-05, | |
| "loss": 0.4052, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.458498023715415, | |
| "grad_norm": 0.42250296473503113, | |
| "learning_rate": 4.8508232838589985e-05, | |
| "loss": 0.3786, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 2.475683107063069, | |
| "grad_norm": 0.6438080072402954, | |
| "learning_rate": 4.849373840445269e-05, | |
| "loss": 0.3529, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.4928681904107233, | |
| "grad_norm": 0.41823628544807434, | |
| "learning_rate": 4.84792439703154e-05, | |
| "loss": 0.4122, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 2.510053273758378, | |
| "grad_norm": 0.6400181651115417, | |
| "learning_rate": 4.8464749536178115e-05, | |
| "loss": 0.3602, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.527238357106032, | |
| "grad_norm": 0.40071290731430054, | |
| "learning_rate": 4.845025510204082e-05, | |
| "loss": 0.3968, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 2.544423440453686, | |
| "grad_norm": 1.0052437782287598, | |
| "learning_rate": 4.843576066790353e-05, | |
| "loss": 0.3927, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.5616085238013406, | |
| "grad_norm": 0.43629390001296997, | |
| "learning_rate": 4.842126623376624e-05, | |
| "loss": 0.3858, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 2.5787936071489947, | |
| "grad_norm": 0.8912670016288757, | |
| "learning_rate": 4.8406771799628944e-05, | |
| "loss": 0.3688, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.595978690496649, | |
| "grad_norm": 0.4484070837497711, | |
| "learning_rate": 4.839227736549166e-05, | |
| "loss": 0.4143, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 2.613163773844303, | |
| "grad_norm": 0.6882378458976746, | |
| "learning_rate": 4.837778293135436e-05, | |
| "loss": 0.3575, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.6303488571919575, | |
| "grad_norm": 0.4403606653213501, | |
| "learning_rate": 4.8363288497217067e-05, | |
| "loss": 0.4099, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 2.6475339405396117, | |
| "grad_norm": 0.7600814700126648, | |
| "learning_rate": 4.834879406307978e-05, | |
| "loss": 0.3732, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.664719023887266, | |
| "grad_norm": 0.3970819115638733, | |
| "learning_rate": 4.833429962894249e-05, | |
| "loss": 0.3994, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 2.68190410723492, | |
| "grad_norm": 0.8571271300315857, | |
| "learning_rate": 4.8319805194805196e-05, | |
| "loss": 0.3354, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.6990891905825745, | |
| "grad_norm": 0.514602780342102, | |
| "learning_rate": 4.83053107606679e-05, | |
| "loss": 0.4109, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 2.7162742739302286, | |
| "grad_norm": 0.5113664269447327, | |
| "learning_rate": 4.829081632653061e-05, | |
| "loss": 0.3742, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.7334593572778827, | |
| "grad_norm": 0.44583624601364136, | |
| "learning_rate": 4.8276321892393325e-05, | |
| "loss": 0.3896, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 2.7506444406255373, | |
| "grad_norm": 0.5685079097747803, | |
| "learning_rate": 4.826182745825603e-05, | |
| "loss": 0.3561, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.7506444406255373, | |
| "eval_loss": 0.3437730669975281, | |
| "eval_runtime": 152.7484, | |
| "eval_samples_per_second": 8.02, | |
| "eval_steps_per_second": 1.008, | |
| "eval_wer": 0.5716405256464604, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.7678295239731914, | |
| "grad_norm": 0.48729953169822693, | |
| "learning_rate": 4.824733302411874e-05, | |
| "loss": 0.3752, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 2.7850146073208455, | |
| "grad_norm": 0.64439457654953, | |
| "learning_rate": 4.823283858998145e-05, | |
| "loss": 0.3513, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.8021996906684996, | |
| "grad_norm": 0.4153486490249634, | |
| "learning_rate": 4.821834415584416e-05, | |
| "loss": 0.3979, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 2.8193847740161537, | |
| "grad_norm": 0.5988856554031372, | |
| "learning_rate": 4.820384972170687e-05, | |
| "loss": 0.3825, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.8365698573638083, | |
| "grad_norm": 0.8136705160140991, | |
| "learning_rate": 4.8189355287569576e-05, | |
| "loss": 0.4321, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 2.8537549407114624, | |
| "grad_norm": 0.6228049993515015, | |
| "learning_rate": 4.8174860853432284e-05, | |
| "loss": 0.3569, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.8709400240591165, | |
| "grad_norm": 0.5307970643043518, | |
| "learning_rate": 4.816036641929499e-05, | |
| "loss": 0.3775, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 2.888125107406771, | |
| "grad_norm": 0.5696175694465637, | |
| "learning_rate": 4.8145871985157705e-05, | |
| "loss": 0.3306, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.905310190754425, | |
| "grad_norm": 0.46450933814048767, | |
| "learning_rate": 4.813137755102041e-05, | |
| "loss": 0.4307, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 2.9224952741020793, | |
| "grad_norm": 0.6127625703811646, | |
| "learning_rate": 4.811688311688312e-05, | |
| "loss": 0.3604, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.939680357449734, | |
| "grad_norm": 0.5017271637916565, | |
| "learning_rate": 4.810238868274583e-05, | |
| "loss": 0.3721, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 2.956865440797388, | |
| "grad_norm": 0.6709030866622925, | |
| "learning_rate": 4.808789424860854e-05, | |
| "loss": 0.3603, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.974050524145042, | |
| "grad_norm": 0.40175503492355347, | |
| "learning_rate": 4.807339981447124e-05, | |
| "loss": 0.376, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 2.9912356074926962, | |
| "grad_norm": 0.6254987716674805, | |
| "learning_rate": 4.805890538033395e-05, | |
| "loss": 0.3316, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 3.0089362433407802, | |
| "grad_norm": 0.4055463373661041, | |
| "learning_rate": 4.804441094619666e-05, | |
| "loss": 0.4126, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 3.0261213266884344, | |
| "grad_norm": 0.5347069501876831, | |
| "learning_rate": 4.802991651205937e-05, | |
| "loss": 0.3477, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 3.0433064100360885, | |
| "grad_norm": 0.4559672474861145, | |
| "learning_rate": 4.801542207792208e-05, | |
| "loss": 0.3458, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 3.060491493383743, | |
| "grad_norm": 0.46173930168151855, | |
| "learning_rate": 4.8000927643784786e-05, | |
| "loss": 0.3316, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 3.077676576731397, | |
| "grad_norm": 0.414719820022583, | |
| "learning_rate": 4.7986433209647494e-05, | |
| "loss": 0.3719, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 3.0948616600790513, | |
| "grad_norm": 0.7085908055305481, | |
| "learning_rate": 4.797193877551021e-05, | |
| "loss": 0.3237, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.0948616600790513, | |
| "eval_loss": 0.3646816313266754, | |
| "eval_runtime": 153.9661, | |
| "eval_samples_per_second": 7.956, | |
| "eval_steps_per_second": 1.0, | |
| "eval_wer": 0.5677193726155151, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.1120467434267054, | |
| "grad_norm": 0.4840669631958008, | |
| "learning_rate": 4.7957444341372916e-05, | |
| "loss": 0.3515, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 3.12923182677436, | |
| "grad_norm": 0.6030757427215576, | |
| "learning_rate": 4.794294990723562e-05, | |
| "loss": 0.3263, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 3.146416910122014, | |
| "grad_norm": 0.5091059803962708, | |
| "learning_rate": 4.792845547309833e-05, | |
| "loss": 0.3315, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 3.163601993469668, | |
| "grad_norm": 0.7523996829986572, | |
| "learning_rate": 4.7913961038961045e-05, | |
| "loss": 0.3368, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 3.1807870768173228, | |
| "grad_norm": 1.0035797357559204, | |
| "learning_rate": 4.789946660482375e-05, | |
| "loss": 0.3931, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 3.197972160164977, | |
| "grad_norm": 0.5936137437820435, | |
| "learning_rate": 4.788497217068646e-05, | |
| "loss": 0.2896, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 3.215157243512631, | |
| "grad_norm": 0.5628079771995544, | |
| "learning_rate": 4.787047773654917e-05, | |
| "loss": 0.3632, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 3.232342326860285, | |
| "grad_norm": 0.5175526738166809, | |
| "learning_rate": 4.7855983302411874e-05, | |
| "loss": 0.33, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 3.2495274102079397, | |
| "grad_norm": 0.36029067635536194, | |
| "learning_rate": 4.784148886827459e-05, | |
| "loss": 0.3487, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 3.266712493555594, | |
| "grad_norm": 0.6295140981674194, | |
| "learning_rate": 4.7826994434137296e-05, | |
| "loss": 0.3391, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 3.283897576903248, | |
| "grad_norm": 0.535555362701416, | |
| "learning_rate": 4.7812500000000003e-05, | |
| "loss": 0.3804, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 3.301082660250902, | |
| "grad_norm": 1.0075314044952393, | |
| "learning_rate": 4.779800556586271e-05, | |
| "loss": 0.3338, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 3.3182677435985566, | |
| "grad_norm": 0.3681296110153198, | |
| "learning_rate": 4.7783511131725425e-05, | |
| "loss": 0.3727, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 3.3354528269462107, | |
| "grad_norm": 0.5938307642936707, | |
| "learning_rate": 4.7769016697588126e-05, | |
| "loss": 0.3565, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 3.352637910293865, | |
| "grad_norm": 0.4364496171474457, | |
| "learning_rate": 4.775452226345083e-05, | |
| "loss": 0.3459, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 3.3698229936415194, | |
| "grad_norm": 0.5793933272361755, | |
| "learning_rate": 4.774002782931354e-05, | |
| "loss": 0.3653, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 3.3870080769891735, | |
| "grad_norm": 0.41033703088760376, | |
| "learning_rate": 4.7725533395176255e-05, | |
| "loss": 0.3629, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 3.4041931603368276, | |
| "grad_norm": 0.6783180236816406, | |
| "learning_rate": 4.771103896103896e-05, | |
| "loss": 0.3258, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 3.4213782436844817, | |
| "grad_norm": 0.4356047511100769, | |
| "learning_rate": 4.769654452690167e-05, | |
| "loss": 0.3423, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 3.4385633270321363, | |
| "grad_norm": 0.645196795463562, | |
| "learning_rate": 4.768205009276438e-05, | |
| "loss": 0.322, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.4385633270321363, | |
| "eval_loss": 0.3426838219165802, | |
| "eval_runtime": 154.5652, | |
| "eval_samples_per_second": 7.925, | |
| "eval_steps_per_second": 0.996, | |
| "eval_wer": 0.5637982195845698, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.4557484103797904, | |
| "grad_norm": 0.7585852742195129, | |
| "learning_rate": 4.766755565862709e-05, | |
| "loss": 0.3615, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 3.4729334937274445, | |
| "grad_norm": 0.6891105771064758, | |
| "learning_rate": 4.76530612244898e-05, | |
| "loss": 0.3016, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 3.4901185770750986, | |
| "grad_norm": 0.40171509981155396, | |
| "learning_rate": 4.7638566790352506e-05, | |
| "loss": 0.3389, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 3.507303660422753, | |
| "grad_norm": 0.6688668131828308, | |
| "learning_rate": 4.7624072356215214e-05, | |
| "loss": 0.3196, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 3.5244887437704073, | |
| "grad_norm": 0.7812600135803223, | |
| "learning_rate": 4.760957792207793e-05, | |
| "loss": 0.3795, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 3.5416738271180614, | |
| "grad_norm": 0.6127042174339294, | |
| "learning_rate": 4.7595083487940635e-05, | |
| "loss": 0.3422, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 3.558858910465716, | |
| "grad_norm": 0.4893074333667755, | |
| "learning_rate": 4.758058905380334e-05, | |
| "loss": 0.3759, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 3.57604399381337, | |
| "grad_norm": 0.7298163175582886, | |
| "learning_rate": 4.756609461966605e-05, | |
| "loss": 0.3388, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 3.5932290771610242, | |
| "grad_norm": 0.5726421475410461, | |
| "learning_rate": 4.755160018552876e-05, | |
| "loss": 0.4172, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 3.6104141605086784, | |
| "grad_norm": 0.5771546959877014, | |
| "learning_rate": 4.753710575139147e-05, | |
| "loss": 0.3217, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 3.6275992438563325, | |
| "grad_norm": 0.48307299613952637, | |
| "learning_rate": 4.752261131725418e-05, | |
| "loss": 0.3864, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 3.644784327203987, | |
| "grad_norm": 0.5440219044685364, | |
| "learning_rate": 4.750811688311689e-05, | |
| "loss": 0.3288, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 3.661969410551641, | |
| "grad_norm": 0.4851985275745392, | |
| "learning_rate": 4.7493622448979594e-05, | |
| "loss": 0.4105, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 3.6791544938992953, | |
| "grad_norm": 0.537399172782898, | |
| "learning_rate": 4.74791280148423e-05, | |
| "loss": 0.3309, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 3.69633957724695, | |
| "grad_norm": 0.4729978144168854, | |
| "learning_rate": 4.746463358070501e-05, | |
| "loss": 0.3604, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 3.713524660594604, | |
| "grad_norm": 0.8028717041015625, | |
| "learning_rate": 4.7450139146567716e-05, | |
| "loss": 0.2908, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 3.730709743942258, | |
| "grad_norm": 0.40672922134399414, | |
| "learning_rate": 4.7435644712430424e-05, | |
| "loss": 0.3871, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 3.747894827289912, | |
| "grad_norm": 0.6138872504234314, | |
| "learning_rate": 4.742115027829314e-05, | |
| "loss": 0.3166, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 3.7650799106375668, | |
| "grad_norm": 0.4270385801792145, | |
| "learning_rate": 4.7406655844155846e-05, | |
| "loss": 0.3449, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 3.782264993985221, | |
| "grad_norm": 0.535121500492096, | |
| "learning_rate": 4.739216141001855e-05, | |
| "loss": 0.2921, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.782264993985221, | |
| "eval_loss": 0.3344533145427704, | |
| "eval_runtime": 150.3102, | |
| "eval_samples_per_second": 8.15, | |
| "eval_steps_per_second": 1.025, | |
| "eval_wer": 0.5604069520983468, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.799450077332875, | |
| "grad_norm": 0.3871770203113556, | |
| "learning_rate": 4.737766697588126e-05, | |
| "loss": 0.349, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 3.816635160680529, | |
| "grad_norm": 0.503182053565979, | |
| "learning_rate": 4.7363172541743975e-05, | |
| "loss": 0.3707, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 3.8338202440281837, | |
| "grad_norm": 0.4528012275695801, | |
| "learning_rate": 4.734867810760668e-05, | |
| "loss": 0.359, | |
| "step": 5575 | |
| }, | |
| { | |
| "epoch": 3.851005327375838, | |
| "grad_norm": 0.630174458026886, | |
| "learning_rate": 4.733418367346939e-05, | |
| "loss": 0.3558, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 3.868190410723492, | |
| "grad_norm": 0.4319029450416565, | |
| "learning_rate": 4.73196892393321e-05, | |
| "loss": 0.3812, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 3.8853754940711465, | |
| "grad_norm": 0.5308706760406494, | |
| "learning_rate": 4.730519480519481e-05, | |
| "loss": 0.2885, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 3.9025605774188006, | |
| "grad_norm": 0.4054734408855438, | |
| "learning_rate": 4.729070037105752e-05, | |
| "loss": 0.363, | |
| "step": 5675 | |
| }, | |
| { | |
| "epoch": 3.9197456607664547, | |
| "grad_norm": 0.8012121319770813, | |
| "learning_rate": 4.7276205936920226e-05, | |
| "loss": 0.3398, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 3.936930744114109, | |
| "grad_norm": 0.4499848783016205, | |
| "learning_rate": 4.7261711502782933e-05, | |
| "loss": 0.3485, | |
| "step": 5725 | |
| }, | |
| { | |
| "epoch": 3.954115827461763, | |
| "grad_norm": 0.5845701098442078, | |
| "learning_rate": 4.724721706864564e-05, | |
| "loss": 0.3188, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 3.9713009108094175, | |
| "grad_norm": 0.4100358486175537, | |
| "learning_rate": 4.7232722634508355e-05, | |
| "loss": 0.3479, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 3.9884859941570716, | |
| "grad_norm": 0.42875462770462036, | |
| "learning_rate": 4.721822820037106e-05, | |
| "loss": 0.3043, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 4.006186630005155, | |
| "grad_norm": 0.4040299355983734, | |
| "learning_rate": 4.720373376623377e-05, | |
| "loss": 0.4045, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 4.02337171335281, | |
| "grad_norm": 0.4601922035217285, | |
| "learning_rate": 4.718923933209648e-05, | |
| "loss": 0.3044, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 4.040556796700464, | |
| "grad_norm": 0.5125726461410522, | |
| "learning_rate": 4.7174744897959185e-05, | |
| "loss": 0.342, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 4.057741880048118, | |
| "grad_norm": 0.5830023884773254, | |
| "learning_rate": 4.716025046382189e-05, | |
| "loss": 0.2916, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 4.0749269633957725, | |
| "grad_norm": 0.3486079275608063, | |
| "learning_rate": 4.71457560296846e-05, | |
| "loss": 0.3215, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 4.092112046743427, | |
| "grad_norm": 0.5681314468383789, | |
| "learning_rate": 4.713126159554731e-05, | |
| "loss": 0.3049, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 4.109297130091081, | |
| "grad_norm": 0.3579752743244171, | |
| "learning_rate": 4.711676716141002e-05, | |
| "loss": 0.3167, | |
| "step": 5975 | |
| }, | |
| { | |
| "epoch": 4.126482213438735, | |
| "grad_norm": 0.8702667355537415, | |
| "learning_rate": 4.710227272727273e-05, | |
| "loss": 0.3037, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 4.126482213438735, | |
| "eval_loss": 0.335175484418869, | |
| "eval_runtime": 150.3279, | |
| "eval_samples_per_second": 8.149, | |
| "eval_steps_per_second": 1.024, | |
| "eval_wer": 0.5541013141161509, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 4.143667296786389, | |
| "grad_norm": 0.6025490760803223, | |
| "learning_rate": 4.7087778293135436e-05, | |
| "loss": 0.3527, | |
| "step": 6025 | |
| }, | |
| { | |
| "epoch": 4.160852380134044, | |
| "grad_norm": 0.46763402223587036, | |
| "learning_rate": 4.7073283858998144e-05, | |
| "loss": 0.3182, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 4.178037463481698, | |
| "grad_norm": 0.38680383563041687, | |
| "learning_rate": 4.705878942486086e-05, | |
| "loss": 0.3234, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 4.195222546829352, | |
| "grad_norm": 0.45606276392936707, | |
| "learning_rate": 4.7044294990723565e-05, | |
| "loss": 0.3418, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 4.212407630177006, | |
| "grad_norm": 0.5080279111862183, | |
| "learning_rate": 4.702980055658627e-05, | |
| "loss": 0.3402, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 4.229592713524661, | |
| "grad_norm": 0.5734138488769531, | |
| "learning_rate": 4.701530612244898e-05, | |
| "loss": 0.3026, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 4.246777796872315, | |
| "grad_norm": 0.34839344024658203, | |
| "learning_rate": 4.7000811688311694e-05, | |
| "loss": 0.3422, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 4.263962880219969, | |
| "grad_norm": 0.5648381114006042, | |
| "learning_rate": 4.69863172541744e-05, | |
| "loss": 0.3075, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 4.281147963567623, | |
| "grad_norm": 0.5454294681549072, | |
| "learning_rate": 4.697182282003711e-05, | |
| "loss": 0.3528, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 4.298333046915277, | |
| "grad_norm": 0.5028226375579834, | |
| "learning_rate": 4.695732838589982e-05, | |
| "loss": 0.2952, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 4.315518130262932, | |
| "grad_norm": 0.45058056712150574, | |
| "learning_rate": 4.6942833951762524e-05, | |
| "loss": 0.3535, | |
| "step": 6275 | |
| }, | |
| { | |
| "epoch": 4.332703213610586, | |
| "grad_norm": 0.6654832363128662, | |
| "learning_rate": 4.692833951762524e-05, | |
| "loss": 0.3127, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 4.34988829695824, | |
| "grad_norm": 0.49009886384010315, | |
| "learning_rate": 4.6913845083487946e-05, | |
| "loss": 0.3419, | |
| "step": 6325 | |
| }, | |
| { | |
| "epoch": 4.367073380305895, | |
| "grad_norm": 0.5751463770866394, | |
| "learning_rate": 4.689935064935065e-05, | |
| "loss": 0.3069, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 4.384258463653548, | |
| "grad_norm": 0.7767444849014282, | |
| "learning_rate": 4.688485621521336e-05, | |
| "loss": 0.3666, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 4.401443547001203, | |
| "grad_norm": 0.5131709575653076, | |
| "learning_rate": 4.687036178107607e-05, | |
| "loss": 0.329, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 4.4186286303488576, | |
| "grad_norm": 0.4997400939464569, | |
| "learning_rate": 4.6855867346938776e-05, | |
| "loss": 0.365, | |
| "step": 6425 | |
| }, | |
| { | |
| "epoch": 4.435813713696511, | |
| "grad_norm": 0.5275589227676392, | |
| "learning_rate": 4.684137291280148e-05, | |
| "loss": 0.31, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 4.452998797044166, | |
| "grad_norm": 0.43651729822158813, | |
| "learning_rate": 4.682687847866419e-05, | |
| "loss": 0.3253, | |
| "step": 6475 | |
| }, | |
| { | |
| "epoch": 4.47018388039182, | |
| "grad_norm": 0.49254560470581055, | |
| "learning_rate": 4.6812384044526905e-05, | |
| "loss": 0.2695, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 4.47018388039182, | |
| "eval_loss": 0.32015639543533325, | |
| "eval_runtime": 154.5447, | |
| "eval_samples_per_second": 7.927, | |
| "eval_steps_per_second": 0.996, | |
| "eval_wer": 0.5515048749470114, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 4.487368963739474, | |
| "grad_norm": 0.3705599904060364, | |
| "learning_rate": 4.679788961038961e-05, | |
| "loss": 0.3453, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 4.504554047087129, | |
| "grad_norm": 0.5254660844802856, | |
| "learning_rate": 4.678339517625232e-05, | |
| "loss": 0.282, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 4.521739130434782, | |
| "grad_norm": 0.37494751811027527, | |
| "learning_rate": 4.676890074211503e-05, | |
| "loss": 0.3491, | |
| "step": 6575 | |
| }, | |
| { | |
| "epoch": 4.538924213782437, | |
| "grad_norm": 0.5620461702346802, | |
| "learning_rate": 4.675440630797774e-05, | |
| "loss": 0.3074, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 4.556109297130091, | |
| "grad_norm": 0.8100690245628357, | |
| "learning_rate": 4.673991187384045e-05, | |
| "loss": 0.3366, | |
| "step": 6625 | |
| }, | |
| { | |
| "epoch": 4.573294380477745, | |
| "grad_norm": 0.7091922760009766, | |
| "learning_rate": 4.6725417439703156e-05, | |
| "loss": 0.2864, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 4.5904794638254, | |
| "grad_norm": 0.38283970952033997, | |
| "learning_rate": 4.6710923005565864e-05, | |
| "loss": 0.3512, | |
| "step": 6675 | |
| }, | |
| { | |
| "epoch": 4.607664547173053, | |
| "grad_norm": 0.5631033182144165, | |
| "learning_rate": 4.669642857142857e-05, | |
| "loss": 0.2884, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 4.624849630520708, | |
| "grad_norm": 0.3868861794471741, | |
| "learning_rate": 4.6681934137291285e-05, | |
| "loss": 0.3413, | |
| "step": 6725 | |
| }, | |
| { | |
| "epoch": 4.642034713868362, | |
| "grad_norm": 0.6378294229507446, | |
| "learning_rate": 4.666743970315399e-05, | |
| "loss": 0.2949, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 4.659219797216016, | |
| "grad_norm": 0.5135634541511536, | |
| "learning_rate": 4.66529452690167e-05, | |
| "loss": 0.3463, | |
| "step": 6775 | |
| }, | |
| { | |
| "epoch": 4.676404880563671, | |
| "grad_norm": 0.4989064633846283, | |
| "learning_rate": 4.663845083487941e-05, | |
| "loss": 0.3027, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 4.693589963911325, | |
| "grad_norm": 0.5633465647697449, | |
| "learning_rate": 4.662395640074212e-05, | |
| "loss": 0.3626, | |
| "step": 6825 | |
| }, | |
| { | |
| "epoch": 4.710775047258979, | |
| "grad_norm": 0.6484938859939575, | |
| "learning_rate": 4.660946196660483e-05, | |
| "loss": 0.2881, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 4.7279601306066334, | |
| "grad_norm": 0.4283730983734131, | |
| "learning_rate": 4.6594967532467537e-05, | |
| "loss": 0.3452, | |
| "step": 6875 | |
| }, | |
| { | |
| "epoch": 4.745145213954288, | |
| "grad_norm": 0.5287323594093323, | |
| "learning_rate": 4.6580473098330244e-05, | |
| "loss": 0.3015, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 4.762330297301942, | |
| "grad_norm": 0.41760608553886414, | |
| "learning_rate": 4.656597866419295e-05, | |
| "loss": 0.3032, | |
| "step": 6925 | |
| }, | |
| { | |
| "epoch": 4.779515380649596, | |
| "grad_norm": 0.6855202317237854, | |
| "learning_rate": 4.655148423005566e-05, | |
| "loss": 0.3006, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 4.796700463997251, | |
| "grad_norm": 0.45387232303619385, | |
| "learning_rate": 4.6536989795918366e-05, | |
| "loss": 0.3428, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 4.8138855473449045, | |
| "grad_norm": 0.8281689286231995, | |
| "learning_rate": 4.6522495361781074e-05, | |
| "loss": 0.2804, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 4.8138855473449045, | |
| "eval_loss": 0.3352712392807007, | |
| "eval_runtime": 156.292, | |
| "eval_samples_per_second": 7.838, | |
| "eval_steps_per_second": 0.985, | |
| "eval_wer": 0.5525116574819839, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 4.831070630692559, | |
| "grad_norm": 0.4252523183822632, | |
| "learning_rate": 4.650800092764379e-05, | |
| "loss": 0.3456, | |
| "step": 7025 | |
| }, | |
| { | |
| "epoch": 4.848255714040213, | |
| "grad_norm": 0.536359429359436, | |
| "learning_rate": 4.6493506493506495e-05, | |
| "loss": 0.3016, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 4.865440797387867, | |
| "grad_norm": 0.48082077503204346, | |
| "learning_rate": 4.64790120593692e-05, | |
| "loss": 0.3323, | |
| "step": 7075 | |
| }, | |
| { | |
| "epoch": 4.882625880735522, | |
| "grad_norm": 0.7152004837989807, | |
| "learning_rate": 4.646451762523191e-05, | |
| "loss": 0.3102, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 4.8998109640831755, | |
| "grad_norm": 0.43397244811058044, | |
| "learning_rate": 4.6450023191094625e-05, | |
| "loss": 0.3671, | |
| "step": 7125 | |
| }, | |
| { | |
| "epoch": 4.91699604743083, | |
| "grad_norm": 1.093762755393982, | |
| "learning_rate": 4.643552875695733e-05, | |
| "loss": 0.299, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 4.934181130778485, | |
| "grad_norm": 0.3550453782081604, | |
| "learning_rate": 4.642103432282004e-05, | |
| "loss": 0.3351, | |
| "step": 7175 | |
| }, | |
| { | |
| "epoch": 4.951366214126138, | |
| "grad_norm": 0.6337935924530029, | |
| "learning_rate": 4.640653988868275e-05, | |
| "loss": 0.2952, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 4.968551297473793, | |
| "grad_norm": 0.5126771926879883, | |
| "learning_rate": 4.6392045454545454e-05, | |
| "loss": 0.3431, | |
| "step": 7225 | |
| }, | |
| { | |
| "epoch": 4.9857363808214465, | |
| "grad_norm": 0.4208792448043823, | |
| "learning_rate": 4.637755102040817e-05, | |
| "loss": 0.3025, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 5.003437016669531, | |
| "grad_norm": 0.5154265761375427, | |
| "learning_rate": 4.6363056586270876e-05, | |
| "loss": 0.3215, | |
| "step": 7275 | |
| }, | |
| { | |
| "epoch": 5.020622100017185, | |
| "grad_norm": 0.4937199652194977, | |
| "learning_rate": 4.634856215213358e-05, | |
| "loss": 0.2898, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 5.03780718336484, | |
| "grad_norm": 0.4737917482852936, | |
| "learning_rate": 4.633406771799629e-05, | |
| "loss": 0.3186, | |
| "step": 7325 | |
| }, | |
| { | |
| "epoch": 5.054992266712493, | |
| "grad_norm": 0.39069080352783203, | |
| "learning_rate": 4.6319573283859005e-05, | |
| "loss": 0.299, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 5.072177350060148, | |
| "grad_norm": 0.41207846999168396, | |
| "learning_rate": 4.630507884972171e-05, | |
| "loss": 0.3261, | |
| "step": 7375 | |
| }, | |
| { | |
| "epoch": 5.089362433407802, | |
| "grad_norm": 0.5971049070358276, | |
| "learning_rate": 4.629058441558442e-05, | |
| "loss": 0.279, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 5.106547516755456, | |
| "grad_norm": 0.41475459933280945, | |
| "learning_rate": 4.627608998144712e-05, | |
| "loss": 0.3126, | |
| "step": 7425 | |
| }, | |
| { | |
| "epoch": 5.123732600103111, | |
| "grad_norm": 0.5062717795372009, | |
| "learning_rate": 4.6261595547309835e-05, | |
| "loss": 0.2741, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 5.140917683450764, | |
| "grad_norm": 0.5244805812835693, | |
| "learning_rate": 4.624710111317254e-05, | |
| "loss": 0.2913, | |
| "step": 7475 | |
| }, | |
| { | |
| "epoch": 5.158102766798419, | |
| "grad_norm": 0.7847909927368164, | |
| "learning_rate": 4.623260667903525e-05, | |
| "loss": 0.2908, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 5.158102766798419, | |
| "eval_loss": 0.3383817970752716, | |
| "eval_runtime": 159.5151, | |
| "eval_samples_per_second": 7.68, | |
| "eval_steps_per_second": 0.965, | |
| "eval_wer": 0.5484845273420941, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 5.1752878501460735, | |
| "grad_norm": 0.44492971897125244, | |
| "learning_rate": 4.621811224489796e-05, | |
| "loss": 0.3359, | |
| "step": 7525 | |
| }, | |
| { | |
| "epoch": 5.192472933493727, | |
| "grad_norm": 0.8884369730949402, | |
| "learning_rate": 4.620361781076067e-05, | |
| "loss": 0.286, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 5.209658016841382, | |
| "grad_norm": 0.4650115966796875, | |
| "learning_rate": 4.618912337662338e-05, | |
| "loss": 0.3538, | |
| "step": 7575 | |
| }, | |
| { | |
| "epoch": 5.226843100189036, | |
| "grad_norm": 0.46531254053115845, | |
| "learning_rate": 4.6174628942486086e-05, | |
| "loss": 0.2828, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 5.24402818353669, | |
| "grad_norm": 0.5248584747314453, | |
| "learning_rate": 4.6160134508348794e-05, | |
| "loss": 0.3166, | |
| "step": 7625 | |
| }, | |
| { | |
| "epoch": 5.2612132668843445, | |
| "grad_norm": 0.676996648311615, | |
| "learning_rate": 4.614564007421151e-05, | |
| "loss": 0.3141, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 5.278398350231998, | |
| "grad_norm": 0.40085482597351074, | |
| "learning_rate": 4.6131145640074215e-05, | |
| "loss": 0.3099, | |
| "step": 7675 | |
| }, | |
| { | |
| "epoch": 5.295583433579653, | |
| "grad_norm": 0.5248492956161499, | |
| "learning_rate": 4.611665120593692e-05, | |
| "loss": 0.2905, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 5.312768516927307, | |
| "grad_norm": 0.3710981607437134, | |
| "learning_rate": 4.610215677179963e-05, | |
| "loss": 0.3114, | |
| "step": 7725 | |
| }, | |
| { | |
| "epoch": 5.329953600274961, | |
| "grad_norm": 0.607997477054596, | |
| "learning_rate": 4.608766233766234e-05, | |
| "loss": 0.2814, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 5.347138683622616, | |
| "grad_norm": 0.41846323013305664, | |
| "learning_rate": 4.607316790352505e-05, | |
| "loss": 0.3442, | |
| "step": 7775 | |
| }, | |
| { | |
| "epoch": 5.36432376697027, | |
| "grad_norm": 0.7187564373016357, | |
| "learning_rate": 4.605867346938776e-05, | |
| "loss": 0.3089, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 5.381508850317924, | |
| "grad_norm": 0.6370894312858582, | |
| "learning_rate": 4.604417903525047e-05, | |
| "loss": 0.2802, | |
| "step": 7825 | |
| }, | |
| { | |
| "epoch": 5.398693933665578, | |
| "grad_norm": 0.4946443736553192, | |
| "learning_rate": 4.6029684601113174e-05, | |
| "loss": 0.2752, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 5.415879017013232, | |
| "grad_norm": 0.5713298916816711, | |
| "learning_rate": 4.601519016697589e-05, | |
| "loss": 0.3335, | |
| "step": 7875 | |
| }, | |
| { | |
| "epoch": 5.433064100360887, | |
| "grad_norm": 0.5185278058052063, | |
| "learning_rate": 4.6000695732838596e-05, | |
| "loss": 0.2779, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 5.450249183708541, | |
| "grad_norm": 0.6777128577232361, | |
| "learning_rate": 4.59862012987013e-05, | |
| "loss": 0.2907, | |
| "step": 7925 | |
| }, | |
| { | |
| "epoch": 5.467434267056195, | |
| "grad_norm": 0.4836239814758301, | |
| "learning_rate": 4.5971706864564004e-05, | |
| "loss": 0.2726, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 5.484619350403849, | |
| "grad_norm": 0.4827396273612976, | |
| "learning_rate": 4.595721243042672e-05, | |
| "loss": 0.2956, | |
| "step": 7975 | |
| }, | |
| { | |
| "epoch": 5.501804433751504, | |
| "grad_norm": 0.4124370813369751, | |
| "learning_rate": 4.5942717996289425e-05, | |
| "loss": 0.2646, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 5.501804433751504, | |
| "eval_loss": 0.31637853384017944, | |
| "eval_runtime": 154.147, | |
| "eval_samples_per_second": 7.947, | |
| "eval_steps_per_second": 0.999, | |
| "eval_wer": 0.5461530309453159, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 5.518989517099158, | |
| "grad_norm": 0.4147077202796936, | |
| "learning_rate": 4.592822356215213e-05, | |
| "loss": 0.3084, | |
| "step": 8025 | |
| }, | |
| { | |
| "epoch": 5.536174600446812, | |
| "grad_norm": 0.5999482274055481, | |
| "learning_rate": 4.591372912801484e-05, | |
| "loss": 0.2824, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 5.553359683794467, | |
| "grad_norm": 0.4082586169242859, | |
| "learning_rate": 4.5899234693877555e-05, | |
| "loss": 0.3095, | |
| "step": 8075 | |
| }, | |
| { | |
| "epoch": 5.57054476714212, | |
| "grad_norm": 0.45266950130462646, | |
| "learning_rate": 4.588474025974026e-05, | |
| "loss": 0.2672, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 5.587729850489775, | |
| "grad_norm": 0.4024001955986023, | |
| "learning_rate": 4.587024582560297e-05, | |
| "loss": 0.3147, | |
| "step": 8125 | |
| }, | |
| { | |
| "epoch": 5.60491493383743, | |
| "grad_norm": 0.61323481798172, | |
| "learning_rate": 4.585575139146568e-05, | |
| "loss": 0.2461, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 5.622100017185083, | |
| "grad_norm": 0.7370169758796692, | |
| "learning_rate": 4.584125695732839e-05, | |
| "loss": 0.3364, | |
| "step": 8175 | |
| }, | |
| { | |
| "epoch": 5.639285100532738, | |
| "grad_norm": 0.5105010867118835, | |
| "learning_rate": 4.58267625231911e-05, | |
| "loss": 0.2749, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 5.6564701838803915, | |
| "grad_norm": 0.4862951636314392, | |
| "learning_rate": 4.5812268089053806e-05, | |
| "loss": 0.318, | |
| "step": 8225 | |
| }, | |
| { | |
| "epoch": 5.673655267228046, | |
| "grad_norm": 0.5929895639419556, | |
| "learning_rate": 4.5797773654916513e-05, | |
| "loss": 0.3215, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 5.690840350575701, | |
| "grad_norm": 0.5355464220046997, | |
| "learning_rate": 4.578327922077922e-05, | |
| "loss": 0.343, | |
| "step": 8275 | |
| }, | |
| { | |
| "epoch": 5.708025433923354, | |
| "grad_norm": 0.6044451594352722, | |
| "learning_rate": 4.5768784786641935e-05, | |
| "loss": 0.2761, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 5.725210517271009, | |
| "grad_norm": 0.5010135769844055, | |
| "learning_rate": 4.575429035250464e-05, | |
| "loss": 0.3525, | |
| "step": 8325 | |
| }, | |
| { | |
| "epoch": 5.742395600618663, | |
| "grad_norm": 0.592808723449707, | |
| "learning_rate": 4.573979591836735e-05, | |
| "loss": 0.2749, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 5.759580683966317, | |
| "grad_norm": 0.5672963261604309, | |
| "learning_rate": 4.572530148423006e-05, | |
| "loss": 0.3711, | |
| "step": 8375 | |
| }, | |
| { | |
| "epoch": 5.776765767313972, | |
| "grad_norm": 0.8346742391586304, | |
| "learning_rate": 4.571080705009277e-05, | |
| "loss": 0.272, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 5.793950850661625, | |
| "grad_norm": 0.5474342107772827, | |
| "learning_rate": 4.569631261595548e-05, | |
| "loss": 0.3022, | |
| "step": 8425 | |
| }, | |
| { | |
| "epoch": 5.81113593400928, | |
| "grad_norm": 0.5067320466041565, | |
| "learning_rate": 4.5681818181818186e-05, | |
| "loss": 0.2632, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 5.828321017356934, | |
| "grad_norm": 0.4363684356212616, | |
| "learning_rate": 4.566732374768089e-05, | |
| "loss": 0.2961, | |
| "step": 8475 | |
| }, | |
| { | |
| "epoch": 5.845506100704588, | |
| "grad_norm": 0.6637271642684937, | |
| "learning_rate": 4.56528293135436e-05, | |
| "loss": 0.2982, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 5.845506100704588, | |
| "eval_loss": 0.3142754137516022, | |
| "eval_runtime": 153.2422, | |
| "eval_samples_per_second": 7.994, | |
| "eval_steps_per_second": 1.005, | |
| "eval_wer": 0.5454641797371768, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 5.862691184052243, | |
| "grad_norm": 0.5597206950187683, | |
| "learning_rate": 4.563833487940631e-05, | |
| "loss": 0.3282, | |
| "step": 8525 | |
| }, | |
| { | |
| "epoch": 5.879876267399897, | |
| "grad_norm": 0.5069429278373718, | |
| "learning_rate": 4.5623840445269016e-05, | |
| "loss": 0.2819, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 5.897061350747551, | |
| "grad_norm": 0.4272073805332184, | |
| "learning_rate": 4.5609346011131724e-05, | |
| "loss": 0.3032, | |
| "step": 8575 | |
| }, | |
| { | |
| "epoch": 5.9142464340952055, | |
| "grad_norm": 0.46952158212661743, | |
| "learning_rate": 4.559485157699444e-05, | |
| "loss": 0.2979, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 5.93143151744286, | |
| "grad_norm": 0.46362563967704773, | |
| "learning_rate": 4.5580357142857145e-05, | |
| "loss": 0.3042, | |
| "step": 8625 | |
| }, | |
| { | |
| "epoch": 5.948616600790514, | |
| "grad_norm": 0.6578242778778076, | |
| "learning_rate": 4.556586270871985e-05, | |
| "loss": 0.2558, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 5.965801684138168, | |
| "grad_norm": 0.5568517446517944, | |
| "learning_rate": 4.555136827458256e-05, | |
| "loss": 0.3127, | |
| "step": 8675 | |
| }, | |
| { | |
| "epoch": 5.982986767485822, | |
| "grad_norm": 0.5202658772468567, | |
| "learning_rate": 4.5536873840445274e-05, | |
| "loss": 0.2956, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 6.000687403333906, | |
| "grad_norm": 0.5851805210113525, | |
| "learning_rate": 4.552237940630798e-05, | |
| "loss": 0.3197, | |
| "step": 8725 | |
| }, | |
| { | |
| "epoch": 6.0178724866815605, | |
| "grad_norm": 0.4245930314064026, | |
| "learning_rate": 4.550788497217069e-05, | |
| "loss": 0.2537, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 6.035057570029215, | |
| "grad_norm": 0.3843390643596649, | |
| "learning_rate": 4.54933905380334e-05, | |
| "loss": 0.2552, | |
| "step": 8775 | |
| }, | |
| { | |
| "epoch": 6.052242653376869, | |
| "grad_norm": 0.4880934953689575, | |
| "learning_rate": 4.5478896103896104e-05, | |
| "loss": 0.2669, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 6.069427736724523, | |
| "grad_norm": 0.44671300053596497, | |
| "learning_rate": 4.546440166975882e-05, | |
| "loss": 0.2894, | |
| "step": 8825 | |
| }, | |
| { | |
| "epoch": 6.086612820072177, | |
| "grad_norm": 0.6307169795036316, | |
| "learning_rate": 4.5449907235621526e-05, | |
| "loss": 0.3032, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 6.1037979034198315, | |
| "grad_norm": 0.4117954969406128, | |
| "learning_rate": 4.543541280148423e-05, | |
| "loss": 0.2863, | |
| "step": 8875 | |
| }, | |
| { | |
| "epoch": 6.120982986767486, | |
| "grad_norm": 0.41599756479263306, | |
| "learning_rate": 4.542091836734694e-05, | |
| "loss": 0.2804, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 6.13816807011514, | |
| "grad_norm": 0.5033993124961853, | |
| "learning_rate": 4.5406423933209655e-05, | |
| "loss": 0.2971, | |
| "step": 8925 | |
| }, | |
| { | |
| "epoch": 6.155353153462794, | |
| "grad_norm": 0.39483895897865295, | |
| "learning_rate": 4.539192949907236e-05, | |
| "loss": 0.2485, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 6.172538236810449, | |
| "grad_norm": 0.48659563064575195, | |
| "learning_rate": 4.537743506493506e-05, | |
| "loss": 0.2847, | |
| "step": 8975 | |
| }, | |
| { | |
| "epoch": 6.189723320158103, | |
| "grad_norm": 0.7809969186782837, | |
| "learning_rate": 4.536294063079777e-05, | |
| "loss": 0.2978, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 6.189723320158103, | |
| "eval_loss": 0.3218280076980591, | |
| "eval_runtime": 151.9921, | |
| "eval_samples_per_second": 8.06, | |
| "eval_steps_per_second": 1.013, | |
| "eval_wer": 0.5423908435777872, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 6.206908403505757, | |
| "grad_norm": 0.6053128242492676, | |
| "learning_rate": 4.5348446196660485e-05, | |
| "loss": 0.275, | |
| "step": 9025 | |
| }, | |
| { | |
| "epoch": 6.224093486853411, | |
| "grad_norm": 0.4554101526737213, | |
| "learning_rate": 4.533395176252319e-05, | |
| "loss": 0.2926, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 6.241278570201065, | |
| "grad_norm": 0.4731072187423706, | |
| "learning_rate": 4.53194573283859e-05, | |
| "loss": 0.2921, | |
| "step": 9075 | |
| }, | |
| { | |
| "epoch": 6.25846365354872, | |
| "grad_norm": 0.4384573996067047, | |
| "learning_rate": 4.530496289424861e-05, | |
| "loss": 0.2827, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 6.275648736896374, | |
| "grad_norm": 0.5838501453399658, | |
| "learning_rate": 4.529046846011132e-05, | |
| "loss": 0.2924, | |
| "step": 9125 | |
| }, | |
| { | |
| "epoch": 6.292833820244028, | |
| "grad_norm": 0.5555030703544617, | |
| "learning_rate": 4.527597402597403e-05, | |
| "loss": 0.2839, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 6.310018903591683, | |
| "grad_norm": 0.7978671193122864, | |
| "learning_rate": 4.5261479591836736e-05, | |
| "loss": 0.2808, | |
| "step": 9175 | |
| }, | |
| { | |
| "epoch": 6.327203986939336, | |
| "grad_norm": 0.4151977002620697, | |
| "learning_rate": 4.5246985157699443e-05, | |
| "loss": 0.2767, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 6.344389070286991, | |
| "grad_norm": 0.5371110439300537, | |
| "learning_rate": 4.523249072356215e-05, | |
| "loss": 0.3124, | |
| "step": 9225 | |
| }, | |
| { | |
| "epoch": 6.3615741536346455, | |
| "grad_norm": 0.4500742554664612, | |
| "learning_rate": 4.5217996289424865e-05, | |
| "loss": 0.2731, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 6.378759236982299, | |
| "grad_norm": 0.5459848642349243, | |
| "learning_rate": 4.520350185528757e-05, | |
| "loss": 0.3118, | |
| "step": 9275 | |
| }, | |
| { | |
| "epoch": 6.395944320329954, | |
| "grad_norm": 0.7165120244026184, | |
| "learning_rate": 4.518900742115028e-05, | |
| "loss": 0.2781, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 6.413129403677607, | |
| "grad_norm": 0.4492073655128479, | |
| "learning_rate": 4.517451298701299e-05, | |
| "loss": 0.2652, | |
| "step": 9325 | |
| }, | |
| { | |
| "epoch": 6.430314487025262, | |
| "grad_norm": 0.8615822196006775, | |
| "learning_rate": 4.51600185528757e-05, | |
| "loss": 0.265, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 6.4474995703729165, | |
| "grad_norm": 0.37190139293670654, | |
| "learning_rate": 4.514552411873841e-05, | |
| "loss": 0.2839, | |
| "step": 9375 | |
| }, | |
| { | |
| "epoch": 6.46468465372057, | |
| "grad_norm": 0.5044118762016296, | |
| "learning_rate": 4.5131029684601116e-05, | |
| "loss": 0.2737, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 6.481869737068225, | |
| "grad_norm": 0.8805606961250305, | |
| "learning_rate": 4.5116535250463824e-05, | |
| "loss": 0.2658, | |
| "step": 9425 | |
| }, | |
| { | |
| "epoch": 6.499054820415879, | |
| "grad_norm": 0.52882981300354, | |
| "learning_rate": 4.510204081632654e-05, | |
| "loss": 0.2875, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 6.516239903763533, | |
| "grad_norm": 0.8483859896659851, | |
| "learning_rate": 4.5087546382189246e-05, | |
| "loss": 0.3182, | |
| "step": 9475 | |
| }, | |
| { | |
| "epoch": 6.533424987111188, | |
| "grad_norm": 0.5920891165733337, | |
| "learning_rate": 4.5073051948051946e-05, | |
| "loss": 0.288, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 6.533424987111188, | |
| "eval_loss": 0.3151616156101227, | |
| "eval_runtime": 152.9497, | |
| "eval_samples_per_second": 8.009, | |
| "eval_steps_per_second": 1.007, | |
| "eval_wer": 0.5417549809241204, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 6.550610070458841, | |
| "grad_norm": 0.5297147035598755, | |
| "learning_rate": 4.5059137291280155e-05, | |
| "loss": 0.2972, | |
| "step": 9525 | |
| }, | |
| { | |
| "epoch": 6.567795153806496, | |
| "grad_norm": 0.43849292397499084, | |
| "learning_rate": 4.504464285714286e-05, | |
| "loss": 0.2753, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 6.58498023715415, | |
| "grad_norm": 0.5231007933616638, | |
| "learning_rate": 4.503014842300557e-05, | |
| "loss": 0.2916, | |
| "step": 9575 | |
| }, | |
| { | |
| "epoch": 6.602165320501804, | |
| "grad_norm": 0.3840227425098419, | |
| "learning_rate": 4.501565398886828e-05, | |
| "loss": 0.2569, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 6.619350403849459, | |
| "grad_norm": 0.4950826168060303, | |
| "learning_rate": 4.5001159554730984e-05, | |
| "loss": 0.3198, | |
| "step": 9625 | |
| }, | |
| { | |
| "epoch": 6.636535487197113, | |
| "grad_norm": 0.4441792368888855, | |
| "learning_rate": 4.49866651205937e-05, | |
| "loss": 0.2796, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 6.653720570544767, | |
| "grad_norm": 0.4779716730117798, | |
| "learning_rate": 4.4972170686456406e-05, | |
| "loss": 0.3165, | |
| "step": 9675 | |
| }, | |
| { | |
| "epoch": 6.670905653892421, | |
| "grad_norm": 0.6962916254997253, | |
| "learning_rate": 4.4957676252319113e-05, | |
| "loss": 0.3176, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 6.688090737240076, | |
| "grad_norm": 0.43603190779685974, | |
| "learning_rate": 4.494318181818182e-05, | |
| "loss": 0.3095, | |
| "step": 9725 | |
| }, | |
| { | |
| "epoch": 6.70527582058773, | |
| "grad_norm": 0.8133454918861389, | |
| "learning_rate": 4.492868738404453e-05, | |
| "loss": 0.2793, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 6.722460903935384, | |
| "grad_norm": 0.4596538245677948, | |
| "learning_rate": 4.4914192949907236e-05, | |
| "loss": 0.2626, | |
| "step": 9775 | |
| }, | |
| { | |
| "epoch": 6.739645987283039, | |
| "grad_norm": 0.6948014497756958, | |
| "learning_rate": 4.489969851576994e-05, | |
| "loss": 0.2546, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 6.756831070630692, | |
| "grad_norm": 0.40873172879219055, | |
| "learning_rate": 4.488520408163265e-05, | |
| "loss": 0.2867, | |
| "step": 9825 | |
| }, | |
| { | |
| "epoch": 6.774016153978347, | |
| "grad_norm": 0.36870279908180237, | |
| "learning_rate": 4.4870709647495365e-05, | |
| "loss": 0.3258, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 6.791201237326001, | |
| "grad_norm": 0.4158894121646881, | |
| "learning_rate": 4.485621521335807e-05, | |
| "loss": 0.3015, | |
| "step": 9875 | |
| }, | |
| { | |
| "epoch": 6.808386320673655, | |
| "grad_norm": 0.4422719180583954, | |
| "learning_rate": 4.484172077922078e-05, | |
| "loss": 0.285, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 6.82557140402131, | |
| "grad_norm": 0.7379534840583801, | |
| "learning_rate": 4.482780612244898e-05, | |
| "loss": 0.3093, | |
| "step": 9925 | |
| }, | |
| { | |
| "epoch": 6.8427564873689635, | |
| "grad_norm": 0.5119884014129639, | |
| "learning_rate": 4.481331168831169e-05, | |
| "loss": 0.2794, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 6.859941570716618, | |
| "grad_norm": 0.46535733342170715, | |
| "learning_rate": 4.4798817254174396e-05, | |
| "loss": 0.2784, | |
| "step": 9975 | |
| }, | |
| { | |
| "epoch": 6.877126654064273, | |
| "grad_norm": 0.49434012174606323, | |
| "learning_rate": 4.4784322820037104e-05, | |
| "loss": 0.2706, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 6.877126654064273, | |
| "eval_loss": 0.321118026971817, | |
| "eval_runtime": 152.9572, | |
| "eval_samples_per_second": 8.009, | |
| "eval_steps_per_second": 1.007, | |
| "eval_wer": 0.53984739296312, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 6.894311737411926, | |
| "grad_norm": 0.4334565997123718, | |
| "learning_rate": 4.476982838589982e-05, | |
| "loss": 0.2762, | |
| "step": 10025 | |
| }, | |
| { | |
| "epoch": 6.911496820759581, | |
| "grad_norm": 0.564243733882904, | |
| "learning_rate": 4.4755333951762525e-05, | |
| "loss": 0.2429, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 6.9286819041072345, | |
| "grad_norm": 0.4657536447048187, | |
| "learning_rate": 4.474083951762523e-05, | |
| "loss": 0.2528, | |
| "step": 10075 | |
| }, | |
| { | |
| "epoch": 6.945866987454889, | |
| "grad_norm": 0.5497089624404907, | |
| "learning_rate": 4.472634508348794e-05, | |
| "loss": 0.2982, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 6.963052070802544, | |
| "grad_norm": 0.7017095685005188, | |
| "learning_rate": 4.471185064935065e-05, | |
| "loss": 0.2762, | |
| "step": 10125 | |
| }, | |
| { | |
| "epoch": 6.980237154150197, | |
| "grad_norm": 0.4462623596191406, | |
| "learning_rate": 4.469735621521336e-05, | |
| "loss": 0.2814, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 6.997422237497852, | |
| "grad_norm": 0.42071837186813354, | |
| "learning_rate": 4.468286178107607e-05, | |
| "loss": 0.2461, | |
| "step": 10175 | |
| }, | |
| { | |
| "epoch": 7.015122873345936, | |
| "grad_norm": 0.5645279288291931, | |
| "learning_rate": 4.466836734693878e-05, | |
| "loss": 0.2862, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 7.0323079566935895, | |
| "grad_norm": 1.0430643558502197, | |
| "learning_rate": 4.4653872912801484e-05, | |
| "loss": 0.2689, | |
| "step": 10225 | |
| }, | |
| { | |
| "epoch": 7.049493040041244, | |
| "grad_norm": 0.5359970331192017, | |
| "learning_rate": 4.46393784786642e-05, | |
| "loss": 0.2685, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 7.066678123388899, | |
| "grad_norm": 0.7322932481765747, | |
| "learning_rate": 4.4624884044526906e-05, | |
| "loss": 0.2806, | |
| "step": 10275 | |
| }, | |
| { | |
| "epoch": 7.083863206736552, | |
| "grad_norm": 0.4568728804588318, | |
| "learning_rate": 4.461038961038961e-05, | |
| "loss": 0.2726, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 7.101048290084207, | |
| "grad_norm": 1.341957926750183, | |
| "learning_rate": 4.459589517625232e-05, | |
| "loss": 0.2647, | |
| "step": 10325 | |
| }, | |
| { | |
| "epoch": 7.1182333734318615, | |
| "grad_norm": 0.48318567872047424, | |
| "learning_rate": 4.4581400742115035e-05, | |
| "loss": 0.28, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 7.135418456779515, | |
| "grad_norm": 0.7370210289955139, | |
| "learning_rate": 4.456690630797774e-05, | |
| "loss": 0.2886, | |
| "step": 10375 | |
| }, | |
| { | |
| "epoch": 7.15260354012717, | |
| "grad_norm": 0.703504741191864, | |
| "learning_rate": 4.455241187384045e-05, | |
| "loss": 0.276, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 7.169788623474824, | |
| "grad_norm": 0.5735402703285217, | |
| "learning_rate": 4.453791743970316e-05, | |
| "loss": 0.2786, | |
| "step": 10425 | |
| }, | |
| { | |
| "epoch": 7.186973706822478, | |
| "grad_norm": 0.36307334899902344, | |
| "learning_rate": 4.4523423005565865e-05, | |
| "loss": 0.2714, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 7.2041587901701325, | |
| "grad_norm": 0.8428148031234741, | |
| "learning_rate": 4.450892857142857e-05, | |
| "loss": 0.2997, | |
| "step": 10475 | |
| }, | |
| { | |
| "epoch": 7.221343873517786, | |
| "grad_norm": 0.8433165550231934, | |
| "learning_rate": 4.449443413729128e-05, | |
| "loss": 0.3008, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 7.221343873517786, | |
| "eval_loss": 0.3265960216522217, | |
| "eval_runtime": 151.3692, | |
| "eval_samples_per_second": 8.093, | |
| "eval_steps_per_second": 1.017, | |
| "eval_wer": 0.53984739296312, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 7.238528956865441, | |
| "grad_norm": 0.7506297826766968, | |
| "learning_rate": 4.447993970315399e-05, | |
| "loss": 0.2789, | |
| "step": 10525 | |
| }, | |
| { | |
| "epoch": 7.255714040213095, | |
| "grad_norm": 0.4726732075214386, | |
| "learning_rate": 4.44654452690167e-05, | |
| "loss": 0.2734, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 7.272899123560749, | |
| "grad_norm": 0.5341284275054932, | |
| "learning_rate": 4.445095083487941e-05, | |
| "loss": 0.2281, | |
| "step": 10575 | |
| }, | |
| { | |
| "epoch": 7.2900842069084035, | |
| "grad_norm": 1.0162677764892578, | |
| "learning_rate": 4.4436456400742116e-05, | |
| "loss": 0.2581, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 7.307269290256058, | |
| "grad_norm": 0.6543082594871521, | |
| "learning_rate": 4.4421961966604823e-05, | |
| "loss": 0.245, | |
| "step": 10625 | |
| }, | |
| { | |
| "epoch": 7.324454373603712, | |
| "grad_norm": 0.46228596568107605, | |
| "learning_rate": 4.440746753246753e-05, | |
| "loss": 0.2835, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 7.341639456951366, | |
| "grad_norm": 1.2381787300109863, | |
| "learning_rate": 4.4392973098330245e-05, | |
| "loss": 0.241, | |
| "step": 10675 | |
| }, | |
| { | |
| "epoch": 7.35882454029902, | |
| "grad_norm": 0.6333453059196472, | |
| "learning_rate": 4.437847866419295e-05, | |
| "loss": 0.2691, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 7.376009623646675, | |
| "grad_norm": 0.5812168121337891, | |
| "learning_rate": 4.436398423005566e-05, | |
| "loss": 0.2679, | |
| "step": 10725 | |
| }, | |
| { | |
| "epoch": 7.393194706994329, | |
| "grad_norm": 0.4156525433063507, | |
| "learning_rate": 4.434948979591837e-05, | |
| "loss": 0.2355, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 7.410379790341983, | |
| "grad_norm": 0.7613847255706787, | |
| "learning_rate": 4.433499536178108e-05, | |
| "loss": 0.2578, | |
| "step": 10775 | |
| }, | |
| { | |
| "epoch": 7.427564873689637, | |
| "grad_norm": 0.4807249903678894, | |
| "learning_rate": 4.432050092764379e-05, | |
| "loss": 0.2873, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 7.444749957037292, | |
| "grad_norm": 0.6777732372283936, | |
| "learning_rate": 4.4306006493506496e-05, | |
| "loss": 0.2319, | |
| "step": 10825 | |
| }, | |
| { | |
| "epoch": 7.461935040384946, | |
| "grad_norm": 0.5690301060676575, | |
| "learning_rate": 4.4291512059369204e-05, | |
| "loss": 0.2933, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 7.4791201237326, | |
| "grad_norm": 0.6938736438751221, | |
| "learning_rate": 4.427701762523191e-05, | |
| "loss": 0.25, | |
| "step": 10875 | |
| }, | |
| { | |
| "epoch": 7.496305207080255, | |
| "grad_norm": 1.2933138608932495, | |
| "learning_rate": 4.4262523191094626e-05, | |
| "loss": 0.2932, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 7.513490290427908, | |
| "grad_norm": 0.6283312439918518, | |
| "learning_rate": 4.424802875695733e-05, | |
| "loss": 0.2703, | |
| "step": 10925 | |
| }, | |
| { | |
| "epoch": 7.530675373775563, | |
| "grad_norm": 0.6064092516899109, | |
| "learning_rate": 4.423353432282004e-05, | |
| "loss": 0.2762, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 7.5478604571232175, | |
| "grad_norm": 0.6407192349433899, | |
| "learning_rate": 4.421903988868275e-05, | |
| "loss": 0.2477, | |
| "step": 10975 | |
| }, | |
| { | |
| "epoch": 7.565045540470871, | |
| "grad_norm": 0.40185797214508057, | |
| "learning_rate": 4.4204545454545455e-05, | |
| "loss": 0.2674, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 7.565045540470871, | |
| "eval_loss": 0.318492591381073, | |
| "eval_runtime": 150.7169, | |
| "eval_samples_per_second": 8.128, | |
| "eval_steps_per_second": 1.022, | |
| "eval_wer": 0.5379398050021196, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 7.565045540470871, | |
| "step": 11000, | |
| "total_flos": 8.071230701734987e+19, | |
| "train_loss": 0.7137202631343494, | |
| "train_runtime": 97593.296, | |
| "train_samples_per_second": 28.619, | |
| "train_steps_per_second": 0.894 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 87240, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 60, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 5 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.071230701734987e+19, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |