| { |
| "best_global_step": 35000, |
| "best_metric": 30.37997340697555, |
| "best_model_checkpoint": "phase5_output/checkpoints/stage1/checkpoint-35000", |
| "epoch": 0.6196938712276135, |
| "eval_steps": 5000, |
| "global_step": 35000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0008852769588965908, |
| "grad_norm": 20.375, |
| "learning_rate": 4.900000000000001e-07, |
| "loss": 2.6002566528320314, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0017705539177931815, |
| "grad_norm": 16.5, |
| "learning_rate": 9.9e-07, |
| "loss": 2.6074697875976565, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0026558308766897725, |
| "grad_norm": 12.8125, |
| "learning_rate": 1.4900000000000001e-06, |
| "loss": 2.661256103515625, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.003541107835586363, |
| "grad_norm": 16.125, |
| "learning_rate": 1.9900000000000004e-06, |
| "loss": 2.3539471435546875, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.004426384794482954, |
| "grad_norm": 13.125, |
| "learning_rate": 2.4900000000000003e-06, |
| "loss": 2.1791415405273438, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.005311661753379545, |
| "grad_norm": 14.3125, |
| "learning_rate": 2.99e-06, |
| "loss": 1.7517926025390624, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.006196938712276136, |
| "grad_norm": 8.9375, |
| "learning_rate": 3.49e-06, |
| "loss": 1.5738864135742188, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.007082215671172726, |
| "grad_norm": 10.375, |
| "learning_rate": 3.990000000000001e-06, |
| "loss": 1.4318115234375, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.007967492630069318, |
| "grad_norm": 12.0, |
| "learning_rate": 4.49e-06, |
| "loss": 1.3452346801757813, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.008852769588965907, |
| "grad_norm": 11.5, |
| "learning_rate": 4.9900000000000005e-06, |
| "loss": 1.3893937683105468, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.009738046547862499, |
| "grad_norm": 20.125, |
| "learning_rate": 5.490000000000001e-06, |
| "loss": 1.3567886352539062, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.01062332350675909, |
| "grad_norm": 9.5625, |
| "learning_rate": 5.99e-06, |
| "loss": 1.3436286926269532, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.01150860046565568, |
| "grad_norm": 9.875, |
| "learning_rate": 6.4900000000000005e-06, |
| "loss": 1.2315786743164063, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.012393877424552271, |
| "grad_norm": 8.75, |
| "learning_rate": 6.99e-06, |
| "loss": 1.2174966430664063, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.013279154383448863, |
| "grad_norm": 9.375, |
| "learning_rate": 7.49e-06, |
| "loss": 1.2520484161376952, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.014164431342345452, |
| "grad_norm": 8.4375, |
| "learning_rate": 7.990000000000001e-06, |
| "loss": 1.2200564575195312, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.015049708301242044, |
| "grad_norm": 11.1875, |
| "learning_rate": 8.49e-06, |
| "loss": 1.1677375793457032, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.015934985260138635, |
| "grad_norm": 8.9375, |
| "learning_rate": 8.99e-06, |
| "loss": 1.1773777770996094, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.016820262219035226, |
| "grad_norm": 8.9375, |
| "learning_rate": 9.49e-06, |
| "loss": 1.1839574432373048, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.017705539177931814, |
| "grad_norm": 8.8125, |
| "learning_rate": 9.990000000000001e-06, |
| "loss": 1.1368023681640624, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.018590816136828406, |
| "grad_norm": 9.625, |
| "learning_rate": 9.998757480474695e-06, |
| "loss": 1.1666729736328125, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.019476093095724997, |
| "grad_norm": 8.0625, |
| "learning_rate": 9.997489603408056e-06, |
| "loss": 1.1337457275390626, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.02036137005462159, |
| "grad_norm": 8.375, |
| "learning_rate": 9.996221726341415e-06, |
| "loss": 1.1557207489013672, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.02124664701351818, |
| "grad_norm": 8.375, |
| "learning_rate": 9.994953849274776e-06, |
| "loss": 1.1596089935302734, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.02213192397241477, |
| "grad_norm": 10.6875, |
| "learning_rate": 9.993685972208136e-06, |
| "loss": 1.115845718383789, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.02301720093131136, |
| "grad_norm": 11.5, |
| "learning_rate": 9.992418095141496e-06, |
| "loss": 1.0784302520751954, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.02390247789020795, |
| "grad_norm": 8.9375, |
| "learning_rate": 9.991150218074856e-06, |
| "loss": 1.0735511779785156, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.024787754849104542, |
| "grad_norm": 8.625, |
| "learning_rate": 9.989882341008217e-06, |
| "loss": 1.0815206146240235, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.025673031808001134, |
| "grad_norm": 8.5, |
| "learning_rate": 9.988614463941578e-06, |
| "loss": 1.0743460845947266, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.026558308766897725, |
| "grad_norm": 7.90625, |
| "learning_rate": 9.987346586874937e-06, |
| "loss": 1.1286388397216798, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.027443585725794313, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.986078709808298e-06, |
| "loss": 1.0764491271972656, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.028328862684690904, |
| "grad_norm": 7.4375, |
| "learning_rate": 9.984810832741659e-06, |
| "loss": 1.0406829071044923, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.029214139643587496, |
| "grad_norm": 7.375, |
| "learning_rate": 9.983542955675018e-06, |
| "loss": 1.075464096069336, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.030099416602484087, |
| "grad_norm": 10.875, |
| "learning_rate": 9.982275078608379e-06, |
| "loss": 1.0150408935546875, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.03098469356138068, |
| "grad_norm": 9.3125, |
| "learning_rate": 9.98100720154174e-06, |
| "loss": 1.0287052154541017, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.03186997052027727, |
| "grad_norm": 9.5625, |
| "learning_rate": 9.9797393244751e-06, |
| "loss": 1.031718978881836, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.03275524747917386, |
| "grad_norm": 10.0625, |
| "learning_rate": 9.97847144740846e-06, |
| "loss": 1.0560354614257812, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.03364052443807045, |
| "grad_norm": 9.4375, |
| "learning_rate": 9.97720357034182e-06, |
| "loss": 0.9881591033935547, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.034525801396967044, |
| "grad_norm": 6.75, |
| "learning_rate": 9.975935693275181e-06, |
| "loss": 0.9941422271728516, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.03541107835586363, |
| "grad_norm": 7.78125, |
| "learning_rate": 9.97466781620854e-06, |
| "loss": 0.9812654876708984, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.03629635531476022, |
| "grad_norm": 7.40625, |
| "learning_rate": 9.973399939141901e-06, |
| "loss": 1.0280473327636719, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.03718163227365681, |
| "grad_norm": 6.53125, |
| "learning_rate": 9.972132062075262e-06, |
| "loss": 1.0059999084472657, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.0380669092325534, |
| "grad_norm": 9.0, |
| "learning_rate": 9.970864185008622e-06, |
| "loss": 1.0437776947021484, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.038952186191449995, |
| "grad_norm": 8.625, |
| "learning_rate": 9.969596307941983e-06, |
| "loss": 0.9704521942138672, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.039837463150346586, |
| "grad_norm": 8.75, |
| "learning_rate": 9.968328430875344e-06, |
| "loss": 1.0186034393310548, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.04072274010924318, |
| "grad_norm": 6.84375, |
| "learning_rate": 9.967060553808703e-06, |
| "loss": 0.9506314086914063, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.04160801706813977, |
| "grad_norm": 10.5625, |
| "learning_rate": 9.965792676742064e-06, |
| "loss": 1.0404967498779296, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.04249329402703636, |
| "grad_norm": 7.375, |
| "learning_rate": 9.964524799675425e-06, |
| "loss": 0.9251933288574219, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.04337857098593295, |
| "grad_norm": 8.0625, |
| "learning_rate": 9.963256922608786e-06, |
| "loss": 1.0444112396240235, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.04426384794482954, |
| "grad_norm": 10.5, |
| "learning_rate": 9.961989045542145e-06, |
| "loss": 0.9814193725585938, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.04514912490372613, |
| "grad_norm": 8.5625, |
| "learning_rate": 9.960721168475506e-06, |
| "loss": 0.9973986053466797, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.04603440186262272, |
| "grad_norm": 9.0625, |
| "learning_rate": 9.959453291408866e-06, |
| "loss": 1.0070331573486329, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.04691967882151931, |
| "grad_norm": 7.8125, |
| "learning_rate": 9.958185414342225e-06, |
| "loss": 0.9542020416259765, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.0478049557804159, |
| "grad_norm": 10.25, |
| "learning_rate": 9.956917537275586e-06, |
| "loss": 0.9571893310546875, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.04869023273931249, |
| "grad_norm": 9.375, |
| "learning_rate": 9.955649660208947e-06, |
| "loss": 0.9763975524902344, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.049575509698209085, |
| "grad_norm": 9.625, |
| "learning_rate": 9.954381783142308e-06, |
| "loss": 0.9599230194091797, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.050460786657105676, |
| "grad_norm": 11.25, |
| "learning_rate": 9.953113906075667e-06, |
| "loss": 1.0347834014892578, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.05134606361600227, |
| "grad_norm": 10.5625, |
| "learning_rate": 9.951846029009028e-06, |
| "loss": 0.9465586853027343, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.05223134057489886, |
| "grad_norm": 8.0625, |
| "learning_rate": 9.950578151942389e-06, |
| "loss": 0.9527477264404297, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.05311661753379545, |
| "grad_norm": 10.4375, |
| "learning_rate": 9.94931027487575e-06, |
| "loss": 0.9980839538574219, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.05400189449269204, |
| "grad_norm": 9.6875, |
| "learning_rate": 9.948042397809109e-06, |
| "loss": 0.9938941192626953, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.054887171451588626, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.94677452074247e-06, |
| "loss": 0.9848545074462891, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.05577244841048522, |
| "grad_norm": 6.875, |
| "learning_rate": 9.94550664367583e-06, |
| "loss": 0.9058123779296875, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.05665772536938181, |
| "grad_norm": 10.0, |
| "learning_rate": 9.94423876660919e-06, |
| "loss": 0.9505638122558594, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.0575430023282784, |
| "grad_norm": 5.96875, |
| "learning_rate": 9.94297088954255e-06, |
| "loss": 0.9797083282470703, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.05842827928717499, |
| "grad_norm": 7.78125, |
| "learning_rate": 9.941703012475911e-06, |
| "loss": 0.9330976867675781, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.05931355624607158, |
| "grad_norm": 10.875, |
| "learning_rate": 9.940435135409272e-06, |
| "loss": 0.9697081756591797, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.060198833204968175, |
| "grad_norm": 8.8125, |
| "learning_rate": 9.939167258342633e-06, |
| "loss": 0.9778965759277344, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.061084110163864766, |
| "grad_norm": 6.46875, |
| "learning_rate": 9.937899381275993e-06, |
| "loss": 0.9776050567626953, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.06196938712276136, |
| "grad_norm": 8.75, |
| "learning_rate": 9.936631504209352e-06, |
| "loss": 0.9795106506347656, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.06285466408165795, |
| "grad_norm": 9.5625, |
| "learning_rate": 9.935363627142713e-06, |
| "loss": 0.9063382720947266, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.06373994104055454, |
| "grad_norm": 8.125, |
| "learning_rate": 9.934095750076074e-06, |
| "loss": 0.9287123107910156, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.06462521799945113, |
| "grad_norm": 10.1875, |
| "learning_rate": 9.932827873009435e-06, |
| "loss": 0.9231369018554687, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.06551049495834772, |
| "grad_norm": 7.5, |
| "learning_rate": 9.931559995942794e-06, |
| "loss": 0.9772643280029297, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.06639577191724431, |
| "grad_norm": 9.75, |
| "learning_rate": 9.930292118876155e-06, |
| "loss": 0.9633369445800781, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.0672810488761409, |
| "grad_norm": 10.3125, |
| "learning_rate": 9.929024241809516e-06, |
| "loss": 0.9590021514892578, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.0681663258350375, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.927756364742875e-06, |
| "loss": 0.9859857177734375, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.06905160279393409, |
| "grad_norm": 7.90625, |
| "learning_rate": 9.926488487676236e-06, |
| "loss": 0.9285535430908203, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.06993687975283067, |
| "grad_norm": 9.4375, |
| "learning_rate": 9.925220610609596e-06, |
| "loss": 0.9168830871582031, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.07082215671172726, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.923952733542957e-06, |
| "loss": 0.9035071563720704, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.07170743367062385, |
| "grad_norm": 7.34375, |
| "learning_rate": 9.922684856476316e-06, |
| "loss": 0.9321700286865234, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.07259271062952044, |
| "grad_norm": 8.875, |
| "learning_rate": 9.921416979409677e-06, |
| "loss": 0.9381676483154296, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.07347798758841703, |
| "grad_norm": 9.6875, |
| "learning_rate": 9.920149102343038e-06, |
| "loss": 0.9207463073730469, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.07436326454731362, |
| "grad_norm": 9.5, |
| "learning_rate": 9.918881225276397e-06, |
| "loss": 0.9887482452392579, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.07524854150621021, |
| "grad_norm": 7.125, |
| "learning_rate": 9.917613348209758e-06, |
| "loss": 0.8934328460693359, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.0761338184651068, |
| "grad_norm": 7.25, |
| "learning_rate": 9.916345471143119e-06, |
| "loss": 0.9096377563476562, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.0770190954240034, |
| "grad_norm": 7.96875, |
| "learning_rate": 9.91507759407648e-06, |
| "loss": 0.914523696899414, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.07790437238289999, |
| "grad_norm": 7.5625, |
| "learning_rate": 9.913809717009839e-06, |
| "loss": 0.9668045806884765, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.07878964934179658, |
| "grad_norm": 7.84375, |
| "learning_rate": 9.9125418399432e-06, |
| "loss": 0.8644290924072265, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.07967492630069317, |
| "grad_norm": 6.65625, |
| "learning_rate": 9.91127396287656e-06, |
| "loss": 0.9925772094726563, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.08056020325958976, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.910006085809921e-06, |
| "loss": 0.9446270751953125, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.08144548021848635, |
| "grad_norm": 9.25, |
| "learning_rate": 9.908738208743282e-06, |
| "loss": 0.8774137115478515, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.08233075717738295, |
| "grad_norm": 9.0, |
| "learning_rate": 9.907470331676643e-06, |
| "loss": 0.9241300201416016, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.08321603413627954, |
| "grad_norm": 7.90625, |
| "learning_rate": 9.906202454610002e-06, |
| "loss": 0.9137750244140626, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.08410131109517613, |
| "grad_norm": 8.5, |
| "learning_rate": 9.904934577543363e-06, |
| "loss": 0.9445246887207032, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.08498658805407272, |
| "grad_norm": 8.8125, |
| "learning_rate": 9.903666700476723e-06, |
| "loss": 0.9241275024414063, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.08587186501296931, |
| "grad_norm": 11.4375, |
| "learning_rate": 9.902398823410082e-06, |
| "loss": 0.9301995849609375, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.0867571419718659, |
| "grad_norm": 9.375, |
| "learning_rate": 9.901130946343443e-06, |
| "loss": 0.8602587127685547, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.0876424189307625, |
| "grad_norm": 8.0625, |
| "learning_rate": 9.899863069276804e-06, |
| "loss": 0.8998529052734375, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.08852769588965909, |
| "grad_norm": 7.96875, |
| "learning_rate": 9.898595192210165e-06, |
| "loss": 0.9169329833984375, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.08852769588965909, |
| "eval_cer": 18.002960906275927, |
| "eval_loss": 0.39146754145622253, |
| "eval_runtime": 397.3897, |
| "eval_samples_per_second": 12.582, |
| "eval_steps_per_second": 1.573, |
| "eval_wer": 35.30223995090518, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.08941297284855568, |
| "grad_norm": 16.625, |
| "learning_rate": 9.897327315143524e-06, |
| "loss": 0.8970352172851562, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.09029824980745225, |
| "grad_norm": 7.3125, |
| "learning_rate": 9.896059438076885e-06, |
| "loss": 0.9074311065673828, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.09118352676634885, |
| "grad_norm": 9.3125, |
| "learning_rate": 9.894791561010246e-06, |
| "loss": 0.9284700775146484, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.09206880372524544, |
| "grad_norm": 7.15625, |
| "learning_rate": 9.893523683943605e-06, |
| "loss": 0.9229075622558593, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.09295408068414203, |
| "grad_norm": 7.96875, |
| "learning_rate": 9.892255806876966e-06, |
| "loss": 0.9389077758789063, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.09383935764303862, |
| "grad_norm": 7.46875, |
| "learning_rate": 9.890987929810326e-06, |
| "loss": 0.9642659759521485, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.09472463460193521, |
| "grad_norm": 9.0625, |
| "learning_rate": 9.889720052743687e-06, |
| "loss": 0.9125606536865234, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.0956099115608318, |
| "grad_norm": 7.3125, |
| "learning_rate": 9.888452175677046e-06, |
| "loss": 0.8953401947021484, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.0964951885197284, |
| "grad_norm": 9.8125, |
| "learning_rate": 9.887184298610407e-06, |
| "loss": 0.8888931274414062, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.09738046547862499, |
| "grad_norm": 8.875, |
| "learning_rate": 9.885916421543768e-06, |
| "loss": 0.9167032623291016, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.09826574243752158, |
| "grad_norm": 8.9375, |
| "learning_rate": 9.884648544477129e-06, |
| "loss": 0.9441605377197265, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.09915101939641817, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.883380667410488e-06, |
| "loss": 0.9138188171386719, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.10003629635531476, |
| "grad_norm": 10.125, |
| "learning_rate": 9.882112790343849e-06, |
| "loss": 0.8812205505371093, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.10092157331421135, |
| "grad_norm": 7.1875, |
| "learning_rate": 9.88084491327721e-06, |
| "loss": 0.875129623413086, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.10180685027310794, |
| "grad_norm": 7.40625, |
| "learning_rate": 9.87957703621057e-06, |
| "loss": 0.8896215057373047, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.10269212723200453, |
| "grad_norm": 10.75, |
| "learning_rate": 9.878309159143931e-06, |
| "loss": 0.870993881225586, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.10357740419090113, |
| "grad_norm": 5.8125, |
| "learning_rate": 9.87704128207729e-06, |
| "loss": 0.8822001647949219, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.10446268114979772, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.875773405010651e-06, |
| "loss": 0.8593311309814453, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.10534795810869431, |
| "grad_norm": 10.1875, |
| "learning_rate": 9.874505527944012e-06, |
| "loss": 0.8941629028320313, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.1062332350675909, |
| "grad_norm": 8.75, |
| "learning_rate": 9.873237650877373e-06, |
| "loss": 0.9121043395996093, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.10711851202648749, |
| "grad_norm": 9.1875, |
| "learning_rate": 9.871969773810732e-06, |
| "loss": 0.8677694702148437, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.10800378898538408, |
| "grad_norm": 7.375, |
| "learning_rate": 9.870701896744092e-06, |
| "loss": 0.8670549011230468, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.10888906594428067, |
| "grad_norm": 12.875, |
| "learning_rate": 9.869434019677453e-06, |
| "loss": 0.857596435546875, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.10977434290317725, |
| "grad_norm": 8.3125, |
| "learning_rate": 9.868166142610814e-06, |
| "loss": 0.8889055633544922, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.11065961986207384, |
| "grad_norm": 9.375, |
| "learning_rate": 9.866898265544173e-06, |
| "loss": 0.8906202697753907, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.11154489682097044, |
| "grad_norm": 8.9375, |
| "learning_rate": 9.865630388477534e-06, |
| "loss": 0.8508009338378906, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.11243017377986703, |
| "grad_norm": 9.4375, |
| "learning_rate": 9.864362511410895e-06, |
| "loss": 0.8770820617675781, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.11331545073876362, |
| "grad_norm": 7.65625, |
| "learning_rate": 9.863094634344254e-06, |
| "loss": 0.9325962829589843, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.11420072769766021, |
| "grad_norm": 10.9375, |
| "learning_rate": 9.861826757277615e-06, |
| "loss": 0.913088150024414, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.1150860046565568, |
| "grad_norm": 7.8125, |
| "learning_rate": 9.860558880210976e-06, |
| "loss": 0.8288323211669922, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.11597128161545339, |
| "grad_norm": 9.1875, |
| "learning_rate": 9.859291003144336e-06, |
| "loss": 0.8909578704833985, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.11685655857434998, |
| "grad_norm": 6.84375, |
| "learning_rate": 9.858023126077696e-06, |
| "loss": 0.8909761810302734, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.11774183553324657, |
| "grad_norm": 8.5625, |
| "learning_rate": 9.856755249011056e-06, |
| "loss": 0.913955078125, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.11862711249214317, |
| "grad_norm": 7.375, |
| "learning_rate": 9.855487371944417e-06, |
| "loss": 0.890057601928711, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.11951238945103976, |
| "grad_norm": 7.09375, |
| "learning_rate": 9.854219494877776e-06, |
| "loss": 0.8805287170410157, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.12039766640993635, |
| "grad_norm": 8.0625, |
| "learning_rate": 9.852951617811137e-06, |
| "loss": 0.8432673645019532, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.12128294336883294, |
| "grad_norm": 8.25, |
| "learning_rate": 9.8516837407445e-06, |
| "loss": 0.936988525390625, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.12216822032772953, |
| "grad_norm": 8.5, |
| "learning_rate": 9.850415863677859e-06, |
| "loss": 0.8533177185058594, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.12305349728662612, |
| "grad_norm": 7.5, |
| "learning_rate": 9.84914798661122e-06, |
| "loss": 0.8586708068847656, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.12393877424552271, |
| "grad_norm": 7.96875, |
| "learning_rate": 9.84788010954458e-06, |
| "loss": 0.8778302001953125, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.1248240512044193, |
| "grad_norm": 9.25, |
| "learning_rate": 9.84661223247794e-06, |
| "loss": 0.9390164947509766, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.1257093281633159, |
| "grad_norm": 7.875, |
| "learning_rate": 9.8453443554113e-06, |
| "loss": 0.8395907592773437, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.1265946051222125, |
| "grad_norm": 8.9375, |
| "learning_rate": 9.844076478344661e-06, |
| "loss": 0.8765547180175781, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.12747988208110908, |
| "grad_norm": 8.8125, |
| "learning_rate": 9.842808601278022e-06, |
| "loss": 0.9166593170166015, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.12836515904000567, |
| "grad_norm": 9.6875, |
| "learning_rate": 9.841540724211381e-06, |
| "loss": 0.8632067108154297, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.12925043599890226, |
| "grad_norm": 10.0, |
| "learning_rate": 9.840272847144742e-06, |
| "loss": 0.8881147003173828, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.13013571295779885, |
| "grad_norm": 9.125, |
| "learning_rate": 9.839004970078103e-06, |
| "loss": 0.8959363555908203, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.13102098991669545, |
| "grad_norm": 5.46875, |
| "learning_rate": 9.837737093011462e-06, |
| "loss": 0.8682516479492187, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.13190626687559204, |
| "grad_norm": 8.875, |
| "learning_rate": 9.836469215944822e-06, |
| "loss": 0.8162654113769531, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.13279154383448863, |
| "grad_norm": 8.125, |
| "learning_rate": 9.835201338878183e-06, |
| "loss": 0.9132343292236328, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.13367682079338522, |
| "grad_norm": 8.1875, |
| "learning_rate": 9.833933461811544e-06, |
| "loss": 0.9420564270019531, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.1345620977522818, |
| "grad_norm": 8.125, |
| "learning_rate": 9.832665584744903e-06, |
| "loss": 0.9325301361083984, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.1354473747111784, |
| "grad_norm": 9.0625, |
| "learning_rate": 9.831397707678264e-06, |
| "loss": 0.9296858978271484, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.136332651670075, |
| "grad_norm": 8.25, |
| "learning_rate": 9.830129830611625e-06, |
| "loss": 0.9119468688964844, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.13721792862897159, |
| "grad_norm": 8.4375, |
| "learning_rate": 9.828861953544984e-06, |
| "loss": 0.8511313629150391, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.13810320558786818, |
| "grad_norm": 9.375, |
| "learning_rate": 9.827594076478345e-06, |
| "loss": 0.8683940124511719, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.13898848254676477, |
| "grad_norm": 8.75, |
| "learning_rate": 9.826326199411706e-06, |
| "loss": 0.8960696411132812, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.13987375950566133, |
| "grad_norm": 7.875, |
| "learning_rate": 9.825058322345066e-06, |
| "loss": 0.9292098999023437, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.14075903646455792, |
| "grad_norm": 8.375, |
| "learning_rate": 9.823790445278425e-06, |
| "loss": 0.8068239593505859, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.14164431342345452, |
| "grad_norm": 7.59375, |
| "learning_rate": 9.822522568211788e-06, |
| "loss": 0.8778212738037109, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.1425295903823511, |
| "grad_norm": 8.25, |
| "learning_rate": 9.821254691145147e-06, |
| "loss": 0.8837771606445313, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.1434148673412477, |
| "grad_norm": 5.875, |
| "learning_rate": 9.819986814078508e-06, |
| "loss": 0.9285024261474609, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.1443001443001443, |
| "grad_norm": 7.71875, |
| "learning_rate": 9.818718937011869e-06, |
| "loss": 0.9287461853027343, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.14518542125904088, |
| "grad_norm": 8.0625, |
| "learning_rate": 9.81745105994523e-06, |
| "loss": 0.8639019775390625, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.14607069821793747, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.816183182878589e-06, |
| "loss": 0.8503567504882813, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.14695597517683406, |
| "grad_norm": 7.90625, |
| "learning_rate": 9.81491530581195e-06, |
| "loss": 0.8940105438232422, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.14784125213573066, |
| "grad_norm": 9.375, |
| "learning_rate": 9.81364742874531e-06, |
| "loss": 0.8853314208984375, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.14872652909462725, |
| "grad_norm": 9.0625, |
| "learning_rate": 9.81237955167867e-06, |
| "loss": 0.9398179626464844, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.14961180605352384, |
| "grad_norm": 9.0625, |
| "learning_rate": 9.81111167461203e-06, |
| "loss": 0.9009015655517578, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.15049708301242043, |
| "grad_norm": 9.5625, |
| "learning_rate": 9.809843797545391e-06, |
| "loss": 0.8552869415283203, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.15138235997131702, |
| "grad_norm": 8.75, |
| "learning_rate": 9.808575920478752e-06, |
| "loss": 0.8683760070800781, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.1522676369302136, |
| "grad_norm": 8.875, |
| "learning_rate": 9.807308043412111e-06, |
| "loss": 0.8234300994873047, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.1531529138891102, |
| "grad_norm": 8.875, |
| "learning_rate": 9.806040166345472e-06, |
| "loss": 0.7992705535888672, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.1540381908480068, |
| "grad_norm": 7.71875, |
| "learning_rate": 9.804772289278833e-06, |
| "loss": 0.8522439575195313, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.1549234678069034, |
| "grad_norm": 10.25, |
| "learning_rate": 9.803504412212193e-06, |
| "loss": 0.8569031524658203, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.15580874476579998, |
| "grad_norm": 8.25, |
| "learning_rate": 9.802236535145552e-06, |
| "loss": 0.9121205139160157, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.15669402172469657, |
| "grad_norm": 8.9375, |
| "learning_rate": 9.800968658078913e-06, |
| "loss": 0.8695069122314453, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.15757929868359316, |
| "grad_norm": 7.875, |
| "learning_rate": 9.799700781012274e-06, |
| "loss": 0.8624824523925781, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.15846457564248975, |
| "grad_norm": 7.21875, |
| "learning_rate": 9.798432903945633e-06, |
| "loss": 0.8402172088623047, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.15934985260138634, |
| "grad_norm": 7.8125, |
| "learning_rate": 9.797165026878994e-06, |
| "loss": 0.8360052490234375, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.16023512956028294, |
| "grad_norm": 10.9375, |
| "learning_rate": 9.795897149812355e-06, |
| "loss": 0.9017723083496094, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.16112040651917953, |
| "grad_norm": 8.4375, |
| "learning_rate": 9.794629272745716e-06, |
| "loss": 0.8305178833007812, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.16200568347807612, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.793361395679076e-06, |
| "loss": 0.8787906646728516, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.1628909604369727, |
| "grad_norm": 8.625, |
| "learning_rate": 9.792093518612437e-06, |
| "loss": 0.9024432373046875, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.1637762373958693, |
| "grad_norm": 10.125, |
| "learning_rate": 9.790825641545796e-06, |
| "loss": 0.8455127716064453, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.1646615143547659, |
| "grad_norm": 10.9375, |
| "learning_rate": 9.789557764479157e-06, |
| "loss": 0.8886133575439453, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.16554679131366248, |
| "grad_norm": 8.8125, |
| "learning_rate": 9.788289887412518e-06, |
| "loss": 0.9232273101806641, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.16643206827255907, |
| "grad_norm": 9.5625, |
| "learning_rate": 9.787022010345879e-06, |
| "loss": 0.7960693359375, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.16731734523145567, |
| "grad_norm": 9.875, |
| "learning_rate": 9.785754133279238e-06, |
| "loss": 0.9040877532958984, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.16820262219035226, |
| "grad_norm": 8.375, |
| "learning_rate": 9.784486256212599e-06, |
| "loss": 0.8423170471191406, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.16908789914924885, |
| "grad_norm": 10.75, |
| "learning_rate": 9.78321837914596e-06, |
| "loss": 0.7995271301269531, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.16997317610814544, |
| "grad_norm": 6.78125, |
| "learning_rate": 9.781950502079319e-06, |
| "loss": 0.8801698303222656, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.17085845306704203, |
| "grad_norm": 6.84375, |
| "learning_rate": 9.78068262501268e-06, |
| "loss": 0.902987060546875, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.17174373002593862, |
| "grad_norm": 8.3125, |
| "learning_rate": 9.77941474794604e-06, |
| "loss": 0.9009125518798828, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.17262900698483521, |
| "grad_norm": 10.4375, |
| "learning_rate": 9.778146870879401e-06, |
| "loss": 0.8579206085205078, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.1735142839437318, |
| "grad_norm": 7.4375, |
| "learning_rate": 9.77687899381276e-06, |
| "loss": 0.8464696502685547, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.1743995609026284, |
| "grad_norm": 11.625, |
| "learning_rate": 9.775611116746121e-06, |
| "loss": 0.85698974609375, |
| "step": 9850 |
| }, |
| { |
| "epoch": 0.175284837861525, |
| "grad_norm": 6.5625, |
| "learning_rate": 9.774343239679482e-06, |
| "loss": 0.8342364501953125, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.17617011482042158, |
| "grad_norm": 8.4375, |
| "learning_rate": 9.773075362612841e-06, |
| "loss": 0.8839446258544922, |
| "step": 9950 |
| }, |
| { |
| "epoch": 0.17705539177931817, |
| "grad_norm": 8.75, |
| "learning_rate": 9.771807485546202e-06, |
| "loss": 0.8295965576171875, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.17705539177931817, |
| "eval_cer": 16.53660732200669, |
| "eval_loss": 0.36992114782333374, |
| "eval_runtime": 390.2086, |
| "eval_samples_per_second": 12.814, |
| "eval_steps_per_second": 1.602, |
| "eval_wer": 33.00347754935052, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.17794066873821476, |
| "grad_norm": 10.0, |
| "learning_rate": 9.770539608479563e-06, |
| "loss": 0.9520655059814453, |
| "step": 10050 |
| }, |
| { |
| "epoch": 0.17882594569711135, |
| "grad_norm": 7.5, |
| "learning_rate": 9.769271731412923e-06, |
| "loss": 0.9026583099365234, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.17971122265600792, |
| "grad_norm": 6.8125, |
| "learning_rate": 9.768003854346282e-06, |
| "loss": 0.8356916046142578, |
| "step": 10150 |
| }, |
| { |
| "epoch": 0.1805964996149045, |
| "grad_norm": 10.125, |
| "learning_rate": 9.766735977279643e-06, |
| "loss": 0.8295375823974609, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.1814817765738011, |
| "grad_norm": 8.8125, |
| "learning_rate": 9.765468100213004e-06, |
| "loss": 0.8467240905761719, |
| "step": 10250 |
| }, |
| { |
| "epoch": 0.1823670535326977, |
| "grad_norm": 7.9375, |
| "learning_rate": 9.764200223146365e-06, |
| "loss": 0.8381356811523437, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.18325233049159428, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.762932346079726e-06, |
| "loss": 0.8656709289550781, |
| "step": 10350 |
| }, |
| { |
| "epoch": 0.18413760745049088, |
| "grad_norm": 8.875, |
| "learning_rate": 9.761664469013086e-06, |
| "loss": 0.874046401977539, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.18502288440938747, |
| "grad_norm": 7.78125, |
| "learning_rate": 9.760396591946446e-06, |
| "loss": 0.8614305877685546, |
| "step": 10450 |
| }, |
| { |
| "epoch": 0.18590816136828406, |
| "grad_norm": 9.125, |
| "learning_rate": 9.759128714879806e-06, |
| "loss": 0.8775393676757812, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.18679343832718065, |
| "grad_norm": 7.625, |
| "learning_rate": 9.757860837813167e-06, |
| "loss": 0.8610476684570313, |
| "step": 10550 |
| }, |
| { |
| "epoch": 0.18767871528607724, |
| "grad_norm": 7.71875, |
| "learning_rate": 9.756592960746526e-06, |
| "loss": 0.9277496337890625, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.18856399224497383, |
| "grad_norm": 8.5, |
| "learning_rate": 9.755325083679887e-06, |
| "loss": 0.8972523498535157, |
| "step": 10650 |
| }, |
| { |
| "epoch": 0.18944926920387042, |
| "grad_norm": 8.0, |
| "learning_rate": 9.754057206613248e-06, |
| "loss": 0.854305648803711, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.19033454616276702, |
| "grad_norm": 5.65625, |
| "learning_rate": 9.752789329546609e-06, |
| "loss": 0.8421508026123047, |
| "step": 10750 |
| }, |
| { |
| "epoch": 0.1912198231216636, |
| "grad_norm": 6.625, |
| "learning_rate": 9.751521452479968e-06, |
| "loss": 0.8855830383300781, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.1921051000805602, |
| "grad_norm": 11.0625, |
| "learning_rate": 9.750253575413329e-06, |
| "loss": 0.8550155639648438, |
| "step": 10850 |
| }, |
| { |
| "epoch": 0.1929903770394568, |
| "grad_norm": 8.5, |
| "learning_rate": 9.74898569834669e-06, |
| "loss": 0.8865677642822266, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.19387565399835338, |
| "grad_norm": 6.9375, |
| "learning_rate": 9.747717821280049e-06, |
| "loss": 0.8427695465087891, |
| "step": 10950 |
| }, |
| { |
| "epoch": 0.19476093095724997, |
| "grad_norm": 7.9375, |
| "learning_rate": 9.74644994421341e-06, |
| "loss": 0.8303961181640624, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.19564620791614656, |
| "grad_norm": 9.25, |
| "learning_rate": 9.74518206714677e-06, |
| "loss": 0.8993209838867188, |
| "step": 11050 |
| }, |
| { |
| "epoch": 0.19653148487504316, |
| "grad_norm": 8.8125, |
| "learning_rate": 9.743914190080131e-06, |
| "loss": 0.8488899993896485, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.19741676183393975, |
| "grad_norm": 6.03125, |
| "learning_rate": 9.74264631301349e-06, |
| "loss": 0.852935791015625, |
| "step": 11150 |
| }, |
| { |
| "epoch": 0.19830203879283634, |
| "grad_norm": 7.84375, |
| "learning_rate": 9.741378435946851e-06, |
| "loss": 0.8230840301513672, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.19918731575173293, |
| "grad_norm": 6.8125, |
| "learning_rate": 9.740110558880212e-06, |
| "loss": 0.8860896301269531, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.20007259271062952, |
| "grad_norm": 7.5, |
| "learning_rate": 9.738842681813573e-06, |
| "loss": 0.8350762939453125, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.2009578696695261, |
| "grad_norm": 7.75, |
| "learning_rate": 9.737574804746932e-06, |
| "loss": 0.835966796875, |
| "step": 11350 |
| }, |
| { |
| "epoch": 0.2018431466284227, |
| "grad_norm": 12.3125, |
| "learning_rate": 9.736306927680292e-06, |
| "loss": 0.85518798828125, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.2027284235873193, |
| "grad_norm": 6.84375, |
| "learning_rate": 9.735039050613653e-06, |
| "loss": 0.8386080169677734, |
| "step": 11450 |
| }, |
| { |
| "epoch": 0.2036137005462159, |
| "grad_norm": 6.1875, |
| "learning_rate": 9.733771173547014e-06, |
| "loss": 0.8899297332763672, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.20449897750511248, |
| "grad_norm": 6.8125, |
| "learning_rate": 9.732503296480375e-06, |
| "loss": 0.8508304595947266, |
| "step": 11550 |
| }, |
| { |
| "epoch": 0.20538425446400907, |
| "grad_norm": 8.1875, |
| "learning_rate": 9.731235419413734e-06, |
| "loss": 0.8747320556640625, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.20626953142290566, |
| "grad_norm": 9.5, |
| "learning_rate": 9.729967542347095e-06, |
| "loss": 0.8832579803466797, |
| "step": 11650 |
| }, |
| { |
| "epoch": 0.20715480838180225, |
| "grad_norm": 7.0, |
| "learning_rate": 9.728699665280456e-06, |
| "loss": 0.8430067443847656, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.20804008534069884, |
| "grad_norm": 8.625, |
| "learning_rate": 9.727431788213816e-06, |
| "loss": 0.9135202026367187, |
| "step": 11750 |
| }, |
| { |
| "epoch": 0.20892536229959544, |
| "grad_norm": 11.5, |
| "learning_rate": 9.726163911147176e-06, |
| "loss": 0.8933136749267578, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.20981063925849203, |
| "grad_norm": 9.25, |
| "learning_rate": 9.724896034080536e-06, |
| "loss": 0.8763120269775391, |
| "step": 11850 |
| }, |
| { |
| "epoch": 0.21069591621738862, |
| "grad_norm": 9.875, |
| "learning_rate": 9.723628157013897e-06, |
| "loss": 0.9074213409423828, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.2115811931762852, |
| "grad_norm": 8.0, |
| "learning_rate": 9.722360279947258e-06, |
| "loss": 0.8514747619628906, |
| "step": 11950 |
| }, |
| { |
| "epoch": 0.2124664701351818, |
| "grad_norm": 8.5, |
| "learning_rate": 9.721092402880617e-06, |
| "loss": 0.8526225280761719, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.2133517470940784, |
| "grad_norm": 8.5, |
| "learning_rate": 9.719824525813978e-06, |
| "loss": 0.8410261535644531, |
| "step": 12050 |
| }, |
| { |
| "epoch": 0.21423702405297498, |
| "grad_norm": 9.1875, |
| "learning_rate": 9.718556648747339e-06, |
| "loss": 0.8436747741699219, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.21512230101187158, |
| "grad_norm": 7.21875, |
| "learning_rate": 9.717288771680698e-06, |
| "loss": 0.8132114410400391, |
| "step": 12150 |
| }, |
| { |
| "epoch": 0.21600757797076817, |
| "grad_norm": 10.0625, |
| "learning_rate": 9.716020894614059e-06, |
| "loss": 0.8950498962402343, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.21689285492966476, |
| "grad_norm": 9.375, |
| "learning_rate": 9.71475301754742e-06, |
| "loss": 0.8579686737060547, |
| "step": 12250 |
| }, |
| { |
| "epoch": 0.21777813188856135, |
| "grad_norm": 7.5625, |
| "learning_rate": 9.71348514048078e-06, |
| "loss": 0.9161724853515625, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.21866340884745794, |
| "grad_norm": 6.65625, |
| "learning_rate": 9.71221726341414e-06, |
| "loss": 0.7839602661132813, |
| "step": 12350 |
| }, |
| { |
| "epoch": 0.2195486858063545, |
| "grad_norm": 7.78125, |
| "learning_rate": 9.7109493863475e-06, |
| "loss": 0.8397283935546875, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.2204339627652511, |
| "grad_norm": 9.0625, |
| "learning_rate": 9.709681509280861e-06, |
| "loss": 0.8791749572753906, |
| "step": 12450 |
| }, |
| { |
| "epoch": 0.2213192397241477, |
| "grad_norm": 8.1875, |
| "learning_rate": 9.70841363221422e-06, |
| "loss": 0.8308121490478516, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.22220451668304428, |
| "grad_norm": 6.34375, |
| "learning_rate": 9.707145755147581e-06, |
| "loss": 0.8770150756835937, |
| "step": 12550 |
| }, |
| { |
| "epoch": 0.22308979364194087, |
| "grad_norm": 9.4375, |
| "learning_rate": 9.705877878080943e-06, |
| "loss": 0.8016796875, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.22397507060083746, |
| "grad_norm": 8.25, |
| "learning_rate": 9.704610001014303e-06, |
| "loss": 0.880452880859375, |
| "step": 12650 |
| }, |
| { |
| "epoch": 0.22486034755973405, |
| "grad_norm": 8.875, |
| "learning_rate": 9.703342123947663e-06, |
| "loss": 0.8713301849365235, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.22574562451863064, |
| "grad_norm": 8.4375, |
| "learning_rate": 9.702074246881024e-06, |
| "loss": 0.8404985046386719, |
| "step": 12750 |
| }, |
| { |
| "epoch": 0.22663090147752724, |
| "grad_norm": 9.5, |
| "learning_rate": 9.700806369814383e-06, |
| "loss": 0.8488478088378906, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.22751617843642383, |
| "grad_norm": 6.15625, |
| "learning_rate": 9.699538492747744e-06, |
| "loss": 0.8541165161132812, |
| "step": 12850 |
| }, |
| { |
| "epoch": 0.22840145539532042, |
| "grad_norm": 7.9375, |
| "learning_rate": 9.698270615681105e-06, |
| "loss": 0.8624703216552735, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.229286732354217, |
| "grad_norm": 7.6875, |
| "learning_rate": 9.697002738614466e-06, |
| "loss": 0.8818684387207031, |
| "step": 12950 |
| }, |
| { |
| "epoch": 0.2301720093131136, |
| "grad_norm": 9.125, |
| "learning_rate": 9.695734861547825e-06, |
| "loss": 0.8864445495605469, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.2310572862720102, |
| "grad_norm": 8.125, |
| "learning_rate": 9.694466984481186e-06, |
| "loss": 0.8283074951171875, |
| "step": 13050 |
| }, |
| { |
| "epoch": 0.23194256323090678, |
| "grad_norm": 7.6875, |
| "learning_rate": 9.693199107414546e-06, |
| "loss": 0.8552584075927734, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.23282784018980338, |
| "grad_norm": 9.5, |
| "learning_rate": 9.691931230347906e-06, |
| "loss": 0.8421883392333984, |
| "step": 13150 |
| }, |
| { |
| "epoch": 0.23371311714869997, |
| "grad_norm": 7.0625, |
| "learning_rate": 9.690663353281266e-06, |
| "loss": 0.8718794250488281, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.23459839410759656, |
| "grad_norm": 6.8125, |
| "learning_rate": 9.689395476214627e-06, |
| "loss": 0.8517426300048828, |
| "step": 13250 |
| }, |
| { |
| "epoch": 0.23548367106649315, |
| "grad_norm": 9.4375, |
| "learning_rate": 9.688127599147988e-06, |
| "loss": 0.8756562042236328, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.23636894802538974, |
| "grad_norm": 9.625, |
| "learning_rate": 9.686859722081347e-06, |
| "loss": 0.8349308776855469, |
| "step": 13350 |
| }, |
| { |
| "epoch": 0.23725422498428633, |
| "grad_norm": 7.75, |
| "learning_rate": 9.685591845014708e-06, |
| "loss": 0.8565451049804688, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.23813950194318292, |
| "grad_norm": 9.1875, |
| "learning_rate": 9.684323967948069e-06, |
| "loss": 0.8808267974853515, |
| "step": 13450 |
| }, |
| { |
| "epoch": 0.23902477890207952, |
| "grad_norm": 13.9375, |
| "learning_rate": 9.683056090881428e-06, |
| "loss": 0.7940772247314453, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.2399100558609761, |
| "grad_norm": 7.0625, |
| "learning_rate": 9.681788213814789e-06, |
| "loss": 0.8729141998291016, |
| "step": 13550 |
| }, |
| { |
| "epoch": 0.2407953328198727, |
| "grad_norm": 9.5625, |
| "learning_rate": 9.68052033674815e-06, |
| "loss": 0.8781705474853516, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.2416806097787693, |
| "grad_norm": 6.40625, |
| "learning_rate": 9.67925245968151e-06, |
| "loss": 0.895041732788086, |
| "step": 13650 |
| }, |
| { |
| "epoch": 0.24256588673766588, |
| "grad_norm": 8.1875, |
| "learning_rate": 9.67798458261487e-06, |
| "loss": 0.8775433349609375, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.24345116369656247, |
| "grad_norm": 8.125, |
| "learning_rate": 9.67671670554823e-06, |
| "loss": 0.79046630859375, |
| "step": 13750 |
| }, |
| { |
| "epoch": 0.24433644065545906, |
| "grad_norm": 6.21875, |
| "learning_rate": 9.675448828481591e-06, |
| "loss": 0.8892935180664062, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.24522171761435566, |
| "grad_norm": 7.6875, |
| "learning_rate": 9.674180951414952e-06, |
| "loss": 0.8445626068115234, |
| "step": 13850 |
| }, |
| { |
| "epoch": 0.24610699457325225, |
| "grad_norm": 7.84375, |
| "learning_rate": 9.672913074348313e-06, |
| "loss": 0.8559996795654297, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.24699227153214884, |
| "grad_norm": 10.25, |
| "learning_rate": 9.671645197281673e-06, |
| "loss": 0.889148941040039, |
| "step": 13950 |
| }, |
| { |
| "epoch": 0.24787754849104543, |
| "grad_norm": 9.9375, |
| "learning_rate": 9.670377320215033e-06, |
| "loss": 0.81448486328125, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.24876282544994202, |
| "grad_norm": 7.5625, |
| "learning_rate": 9.669109443148393e-06, |
| "loss": 0.8375322723388672, |
| "step": 14050 |
| }, |
| { |
| "epoch": 0.2496481024088386, |
| "grad_norm": 9.1875, |
| "learning_rate": 9.667841566081754e-06, |
| "loss": 0.7957274627685547, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.2505333793677352, |
| "grad_norm": 8.4375, |
| "learning_rate": 9.666573689015113e-06, |
| "loss": 0.8567101287841797, |
| "step": 14150 |
| }, |
| { |
| "epoch": 0.2514186563266318, |
| "grad_norm": 9.5625, |
| "learning_rate": 9.665305811948474e-06, |
| "loss": 0.8208657073974609, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.25230393328552836, |
| "grad_norm": 6.9375, |
| "learning_rate": 9.664037934881835e-06, |
| "loss": 0.8073037719726562, |
| "step": 14250 |
| }, |
| { |
| "epoch": 0.253189210244425, |
| "grad_norm": 8.125, |
| "learning_rate": 9.662770057815196e-06, |
| "loss": 0.8132960510253906, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.25407448720332154, |
| "grad_norm": 8.125, |
| "learning_rate": 9.661502180748555e-06, |
| "loss": 0.839927749633789, |
| "step": 14350 |
| }, |
| { |
| "epoch": 0.25495976416221816, |
| "grad_norm": 8.0, |
| "learning_rate": 9.660234303681916e-06, |
| "loss": 0.8778898620605469, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.2558450411211147, |
| "grad_norm": 8.3125, |
| "learning_rate": 9.658966426615276e-06, |
| "loss": 0.833895492553711, |
| "step": 14450 |
| }, |
| { |
| "epoch": 0.25673031808001134, |
| "grad_norm": 7.9375, |
| "learning_rate": 9.657698549548637e-06, |
| "loss": 0.8810472869873047, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.2576155950389079, |
| "grad_norm": 7.46875, |
| "learning_rate": 9.656430672481996e-06, |
| "loss": 0.7839117431640625, |
| "step": 14550 |
| }, |
| { |
| "epoch": 0.2585008719978045, |
| "grad_norm": 8.5, |
| "learning_rate": 9.655162795415357e-06, |
| "loss": 0.9003073120117188, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.2593861489567011, |
| "grad_norm": 8.375, |
| "learning_rate": 9.653894918348718e-06, |
| "loss": 0.8191262054443359, |
| "step": 14650 |
| }, |
| { |
| "epoch": 0.2602714259155977, |
| "grad_norm": 9.0625, |
| "learning_rate": 9.652627041282077e-06, |
| "loss": 0.8235029602050781, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.2611567028744943, |
| "grad_norm": 7.75, |
| "learning_rate": 9.651359164215438e-06, |
| "loss": 0.8247006225585938, |
| "step": 14750 |
| }, |
| { |
| "epoch": 0.2620419798333909, |
| "grad_norm": 6.875, |
| "learning_rate": 9.650091287148799e-06, |
| "loss": 0.8273910522460938, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.26292725679228746, |
| "grad_norm": 8.25, |
| "learning_rate": 9.64882341008216e-06, |
| "loss": 0.7925537109375, |
| "step": 14850 |
| }, |
| { |
| "epoch": 0.2638125337511841, |
| "grad_norm": 6.78125, |
| "learning_rate": 9.647555533015519e-06, |
| "loss": 0.8129417419433593, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.26469781071008064, |
| "grad_norm": 8.4375, |
| "learning_rate": 9.646287655948881e-06, |
| "loss": 0.7914369964599609, |
| "step": 14950 |
| }, |
| { |
| "epoch": 0.26558308766897726, |
| "grad_norm": 7.53125, |
| "learning_rate": 9.64501977888224e-06, |
| "loss": 0.8484162139892578, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.26558308766897726, |
| "eval_cer": 16.190625965955, |
| "eval_loss": 0.3607212007045746, |
| "eval_runtime": 393.1507, |
| "eval_samples_per_second": 12.718, |
| "eval_steps_per_second": 1.59, |
| "eval_wer": 32.338651938222355, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.2664683646278738, |
| "grad_norm": 7.75, |
| "learning_rate": 9.643751901815601e-06, |
| "loss": 0.9018843078613281, |
| "step": 15050 |
| }, |
| { |
| "epoch": 0.26735364158677044, |
| "grad_norm": 9.5625, |
| "learning_rate": 9.642484024748962e-06, |
| "loss": 0.8514089965820313, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.268238918545667, |
| "grad_norm": 8.0, |
| "learning_rate": 9.641216147682323e-06, |
| "loss": 0.832979507446289, |
| "step": 15150 |
| }, |
| { |
| "epoch": 0.2691241955045636, |
| "grad_norm": 10.0, |
| "learning_rate": 9.639948270615682e-06, |
| "loss": 0.843365707397461, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.2700094724634602, |
| "grad_norm": 7.78125, |
| "learning_rate": 9.638680393549043e-06, |
| "loss": 0.8681787109375, |
| "step": 15250 |
| }, |
| { |
| "epoch": 0.2708947494223568, |
| "grad_norm": 9.25, |
| "learning_rate": 9.637412516482403e-06, |
| "loss": 0.8352089691162109, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.27178002638125337, |
| "grad_norm": 6.59375, |
| "learning_rate": 9.636144639415763e-06, |
| "loss": 0.890997543334961, |
| "step": 15350 |
| }, |
| { |
| "epoch": 0.27266530334015, |
| "grad_norm": 10.1875, |
| "learning_rate": 9.634876762349123e-06, |
| "loss": 0.8376169586181641, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.27355058029904655, |
| "grad_norm": 9.75, |
| "learning_rate": 9.633608885282484e-06, |
| "loss": 0.8699169921875, |
| "step": 15450 |
| }, |
| { |
| "epoch": 0.27443585725794317, |
| "grad_norm": 8.625, |
| "learning_rate": 9.632341008215845e-06, |
| "loss": 0.8817887115478515, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.27532113421683974, |
| "grad_norm": 8.625, |
| "learning_rate": 9.631073131149204e-06, |
| "loss": 0.8531747436523438, |
| "step": 15550 |
| }, |
| { |
| "epoch": 0.27620641117573635, |
| "grad_norm": 7.09375, |
| "learning_rate": 9.629805254082565e-06, |
| "loss": 0.78208740234375, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.2770916881346329, |
| "grad_norm": 7.28125, |
| "learning_rate": 9.628537377015926e-06, |
| "loss": 0.7966637420654297, |
| "step": 15650 |
| }, |
| { |
| "epoch": 0.27797696509352954, |
| "grad_norm": 6.0, |
| "learning_rate": 9.627269499949285e-06, |
| "loss": 0.8817159271240235, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.2788622420524261, |
| "grad_norm": 9.25, |
| "learning_rate": 9.626001622882646e-06, |
| "loss": 0.8878803253173828, |
| "step": 15750 |
| }, |
| { |
| "epoch": 0.27974751901132267, |
| "grad_norm": 8.0, |
| "learning_rate": 9.624733745816006e-06, |
| "loss": 0.803402328491211, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.2806327959702193, |
| "grad_norm": 11.5625, |
| "learning_rate": 9.623465868749367e-06, |
| "loss": 0.8275116729736328, |
| "step": 15850 |
| }, |
| { |
| "epoch": 0.28151807292911585, |
| "grad_norm": 10.5625, |
| "learning_rate": 9.622197991682726e-06, |
| "loss": 0.887125244140625, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.28240334988801247, |
| "grad_norm": 7.84375, |
| "learning_rate": 9.620930114616087e-06, |
| "loss": 0.9281369781494141, |
| "step": 15950 |
| }, |
| { |
| "epoch": 0.28328862684690903, |
| "grad_norm": 8.125, |
| "learning_rate": 9.619662237549448e-06, |
| "loss": 0.7792628479003906, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.28417390380580565, |
| "grad_norm": 7.84375, |
| "learning_rate": 9.618394360482807e-06, |
| "loss": 0.797691650390625, |
| "step": 16050 |
| }, |
| { |
| "epoch": 0.2850591807647022, |
| "grad_norm": 7.34375, |
| "learning_rate": 9.61712648341617e-06, |
| "loss": 0.8892618560791016, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.28594445772359883, |
| "grad_norm": 10.875, |
| "learning_rate": 9.61585860634953e-06, |
| "loss": 0.8775372314453125, |
| "step": 16150 |
| }, |
| { |
| "epoch": 0.2868297346824954, |
| "grad_norm": 7.21875, |
| "learning_rate": 9.61459072928289e-06, |
| "loss": 0.8037387084960937, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.287715011641392, |
| "grad_norm": 11.25, |
| "learning_rate": 9.61332285221625e-06, |
| "loss": 0.8707780456542968, |
| "step": 16250 |
| }, |
| { |
| "epoch": 0.2886002886002886, |
| "grad_norm": 8.25, |
| "learning_rate": 9.612054975149611e-06, |
| "loss": 0.8491946411132812, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.2894855655591852, |
| "grad_norm": 8.4375, |
| "learning_rate": 9.61078709808297e-06, |
| "loss": 0.8137165069580078, |
| "step": 16350 |
| }, |
| { |
| "epoch": 0.29037084251808176, |
| "grad_norm": 13.0625, |
| "learning_rate": 9.609519221016331e-06, |
| "loss": 0.8208762359619141, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.2912561194769784, |
| "grad_norm": 8.75, |
| "learning_rate": 9.608251343949692e-06, |
| "loss": 0.8479267120361328, |
| "step": 16450 |
| }, |
| { |
| "epoch": 0.29214139643587494, |
| "grad_norm": 9.25, |
| "learning_rate": 9.606983466883053e-06, |
| "loss": 0.8337993621826172, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.29302667339477156, |
| "grad_norm": 8.75, |
| "learning_rate": 9.605715589816412e-06, |
| "loss": 0.8054754638671875, |
| "step": 16550 |
| }, |
| { |
| "epoch": 0.2939119503536681, |
| "grad_norm": 6.53125, |
| "learning_rate": 9.604447712749773e-06, |
| "loss": 0.7765091705322266, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.29479722731256475, |
| "grad_norm": 7.625, |
| "learning_rate": 9.603179835683133e-06, |
| "loss": 0.8374893188476562, |
| "step": 16650 |
| }, |
| { |
| "epoch": 0.2956825042714613, |
| "grad_norm": 9.625, |
| "learning_rate": 9.601911958616492e-06, |
| "loss": 0.8807014465332031, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.29656778123035793, |
| "grad_norm": 7.375, |
| "learning_rate": 9.600644081549853e-06, |
| "loss": 0.7978987884521485, |
| "step": 16750 |
| }, |
| { |
| "epoch": 0.2974530581892545, |
| "grad_norm": 9.75, |
| "learning_rate": 9.599376204483214e-06, |
| "loss": 0.8720764923095703, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.2983383351481511, |
| "grad_norm": 9.5625, |
| "learning_rate": 9.598108327416575e-06, |
| "loss": 0.8416900634765625, |
| "step": 16850 |
| }, |
| { |
| "epoch": 0.2992236121070477, |
| "grad_norm": 9.0625, |
| "learning_rate": 9.596840450349934e-06, |
| "loss": 0.8841259002685546, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.3001088890659443, |
| "grad_norm": 8.4375, |
| "learning_rate": 9.595572573283295e-06, |
| "loss": 0.8410957336425782, |
| "step": 16950 |
| }, |
| { |
| "epoch": 0.30099416602484086, |
| "grad_norm": 10.75, |
| "learning_rate": 9.594304696216656e-06, |
| "loss": 0.8281800079345704, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.3018794429837375, |
| "grad_norm": 9.1875, |
| "learning_rate": 9.593036819150016e-06, |
| "loss": 0.8017253875732422, |
| "step": 17050 |
| }, |
| { |
| "epoch": 0.30276471994263404, |
| "grad_norm": 10.75, |
| "learning_rate": 9.591768942083376e-06, |
| "loss": 0.7658233642578125, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.30364999690153066, |
| "grad_norm": 9.8125, |
| "learning_rate": 9.590501065016736e-06, |
| "loss": 0.8435968780517578, |
| "step": 17150 |
| }, |
| { |
| "epoch": 0.3045352738604272, |
| "grad_norm": 10.9375, |
| "learning_rate": 9.589233187950097e-06, |
| "loss": 0.8984470367431641, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.30542055081932384, |
| "grad_norm": 7.75, |
| "learning_rate": 9.587965310883458e-06, |
| "loss": 0.8200258636474609, |
| "step": 17250 |
| }, |
| { |
| "epoch": 0.3063058277782204, |
| "grad_norm": 7.09375, |
| "learning_rate": 9.586697433816819e-06, |
| "loss": 0.8967515563964844, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.307191104737117, |
| "grad_norm": 7.40625, |
| "learning_rate": 9.585429556750178e-06, |
| "loss": 0.81177734375, |
| "step": 17350 |
| }, |
| { |
| "epoch": 0.3080763816960136, |
| "grad_norm": 9.3125, |
| "learning_rate": 9.584161679683539e-06, |
| "loss": 0.8331631469726563, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.3089616586549102, |
| "grad_norm": 7.6875, |
| "learning_rate": 9.5828938026169e-06, |
| "loss": 0.834053726196289, |
| "step": 17450 |
| }, |
| { |
| "epoch": 0.3098469356138068, |
| "grad_norm": 7.625, |
| "learning_rate": 9.58162592555026e-06, |
| "loss": 0.7964566040039063, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.3107322125727034, |
| "grad_norm": 9.0, |
| "learning_rate": 9.58035804848362e-06, |
| "loss": 0.8436365509033203, |
| "step": 17550 |
| }, |
| { |
| "epoch": 0.31161748953159996, |
| "grad_norm": 8.625, |
| "learning_rate": 9.57909017141698e-06, |
| "loss": 0.8416962432861328, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.3125027664904966, |
| "grad_norm": 9.3125, |
| "learning_rate": 9.577822294350341e-06, |
| "loss": 0.7992512512207032, |
| "step": 17650 |
| }, |
| { |
| "epoch": 0.31338804344939314, |
| "grad_norm": 7.25, |
| "learning_rate": 9.576554417283702e-06, |
| "loss": 0.7787315368652343, |
| "step": 17700 |
| }, |
| { |
| "epoch": 0.31427332040828976, |
| "grad_norm": 10.25, |
| "learning_rate": 9.575286540217061e-06, |
| "loss": 0.8611086273193359, |
| "step": 17750 |
| }, |
| { |
| "epoch": 0.3151585973671863, |
| "grad_norm": 7.625, |
| "learning_rate": 9.574018663150422e-06, |
| "loss": 0.8016007995605469, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.31604387432608294, |
| "grad_norm": 7.0625, |
| "learning_rate": 9.572750786083783e-06, |
| "loss": 0.8051242828369141, |
| "step": 17850 |
| }, |
| { |
| "epoch": 0.3169291512849795, |
| "grad_norm": 7.96875, |
| "learning_rate": 9.571482909017142e-06, |
| "loss": 0.8546369171142578, |
| "step": 17900 |
| }, |
| { |
| "epoch": 0.3178144282438761, |
| "grad_norm": 9.75, |
| "learning_rate": 9.570215031950503e-06, |
| "loss": 0.8925285339355469, |
| "step": 17950 |
| }, |
| { |
| "epoch": 0.3186997052027727, |
| "grad_norm": 7.4375, |
| "learning_rate": 9.568947154883863e-06, |
| "loss": 0.852982177734375, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.31958498216166925, |
| "grad_norm": 7.5625, |
| "learning_rate": 9.567679277817224e-06, |
| "loss": 0.8864115142822265, |
| "step": 18050 |
| }, |
| { |
| "epoch": 0.32047025912056587, |
| "grad_norm": 8.625, |
| "learning_rate": 9.566411400750583e-06, |
| "loss": 0.7933511352539062, |
| "step": 18100 |
| }, |
| { |
| "epoch": 0.32135553607946243, |
| "grad_norm": 6.71875, |
| "learning_rate": 9.565143523683944e-06, |
| "loss": 0.816483154296875, |
| "step": 18150 |
| }, |
| { |
| "epoch": 0.32224081303835905, |
| "grad_norm": 8.9375, |
| "learning_rate": 9.563875646617305e-06, |
| "loss": 0.8137828063964844, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.3231260899972556, |
| "grad_norm": 7.5, |
| "learning_rate": 9.562607769550664e-06, |
| "loss": 0.8930496978759765, |
| "step": 18250 |
| }, |
| { |
| "epoch": 0.32401136695615224, |
| "grad_norm": 7.875, |
| "learning_rate": 9.561339892484025e-06, |
| "loss": 0.8350536346435546, |
| "step": 18300 |
| }, |
| { |
| "epoch": 0.3248966439150488, |
| "grad_norm": 7.5625, |
| "learning_rate": 9.560072015417386e-06, |
| "loss": 0.7733663940429687, |
| "step": 18350 |
| }, |
| { |
| "epoch": 0.3257819208739454, |
| "grad_norm": 9.9375, |
| "learning_rate": 9.558804138350746e-06, |
| "loss": 0.8673963165283203, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.326667197832842, |
| "grad_norm": 8.5, |
| "learning_rate": 9.557536261284107e-06, |
| "loss": 0.8465771484375, |
| "step": 18450 |
| }, |
| { |
| "epoch": 0.3275524747917386, |
| "grad_norm": 8.9375, |
| "learning_rate": 9.556268384217468e-06, |
| "loss": 0.8279172515869141, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.32843775175063517, |
| "grad_norm": 9.4375, |
| "learning_rate": 9.555000507150827e-06, |
| "loss": 0.8542655181884765, |
| "step": 18550 |
| }, |
| { |
| "epoch": 0.3293230287095318, |
| "grad_norm": 7.09375, |
| "learning_rate": 9.553732630084188e-06, |
| "loss": 0.8693686676025391, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.33020830566842835, |
| "grad_norm": 9.625, |
| "learning_rate": 9.552464753017549e-06, |
| "loss": 0.7420355224609375, |
| "step": 18650 |
| }, |
| { |
| "epoch": 0.33109358262732497, |
| "grad_norm": 9.75, |
| "learning_rate": 9.55119687595091e-06, |
| "loss": 0.8566489410400391, |
| "step": 18700 |
| }, |
| { |
| "epoch": 0.33197885958622153, |
| "grad_norm": 6.65625, |
| "learning_rate": 9.549928998884269e-06, |
| "loss": 0.8141315460205079, |
| "step": 18750 |
| }, |
| { |
| "epoch": 0.33286413654511815, |
| "grad_norm": 9.3125, |
| "learning_rate": 9.54866112181763e-06, |
| "loss": 0.8337672424316406, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.3337494135040147, |
| "grad_norm": 9.0625, |
| "learning_rate": 9.54739324475099e-06, |
| "loss": 0.8334447479248047, |
| "step": 18850 |
| }, |
| { |
| "epoch": 0.33463469046291133, |
| "grad_norm": 8.4375, |
| "learning_rate": 9.54612536768435e-06, |
| "loss": 0.8280233764648437, |
| "step": 18900 |
| }, |
| { |
| "epoch": 0.3355199674218079, |
| "grad_norm": 8.875, |
| "learning_rate": 9.54485749061771e-06, |
| "loss": 0.8619183349609375, |
| "step": 18950 |
| }, |
| { |
| "epoch": 0.3364052443807045, |
| "grad_norm": 7.28125, |
| "learning_rate": 9.543589613551071e-06, |
| "loss": 0.792462158203125, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.3372905213396011, |
| "grad_norm": 10.0, |
| "learning_rate": 9.542321736484432e-06, |
| "loss": 0.8707679748535156, |
| "step": 19050 |
| }, |
| { |
| "epoch": 0.3381757982984977, |
| "grad_norm": 9.4375, |
| "learning_rate": 9.541053859417791e-06, |
| "loss": 0.8130400085449219, |
| "step": 19100 |
| }, |
| { |
| "epoch": 0.33906107525739426, |
| "grad_norm": 8.5, |
| "learning_rate": 9.539785982351152e-06, |
| "loss": 0.7939989471435547, |
| "step": 19150 |
| }, |
| { |
| "epoch": 0.3399463522162909, |
| "grad_norm": 7.46875, |
| "learning_rate": 9.538518105284513e-06, |
| "loss": 0.8232540893554687, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.34083162917518744, |
| "grad_norm": 9.0, |
| "learning_rate": 9.537250228217872e-06, |
| "loss": 0.8494704437255859, |
| "step": 19250 |
| }, |
| { |
| "epoch": 0.34171690613408406, |
| "grad_norm": 7.125, |
| "learning_rate": 9.535982351151233e-06, |
| "loss": 0.8644766235351562, |
| "step": 19300 |
| }, |
| { |
| "epoch": 0.34260218309298063, |
| "grad_norm": 8.75, |
| "learning_rate": 9.534714474084593e-06, |
| "loss": 0.8599738311767579, |
| "step": 19350 |
| }, |
| { |
| "epoch": 0.34348746005187725, |
| "grad_norm": 7.75, |
| "learning_rate": 9.533446597017954e-06, |
| "loss": 0.8332124328613282, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.3443727370107738, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.532178719951313e-06, |
| "loss": 0.8581776428222656, |
| "step": 19450 |
| }, |
| { |
| "epoch": 0.34525801396967043, |
| "grad_norm": 6.875, |
| "learning_rate": 9.530910842884674e-06, |
| "loss": 0.8054019927978515, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.346143290928567, |
| "grad_norm": 8.9375, |
| "learning_rate": 9.529642965818035e-06, |
| "loss": 0.833067398071289, |
| "step": 19550 |
| }, |
| { |
| "epoch": 0.3470285678874636, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.528375088751396e-06, |
| "loss": 0.8120539855957031, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.3479138448463602, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.527107211684756e-06, |
| "loss": 0.7809496307373047, |
| "step": 19650 |
| }, |
| { |
| "epoch": 0.3487991218052568, |
| "grad_norm": 9.4375, |
| "learning_rate": 9.525839334618117e-06, |
| "loss": 0.8102002716064454, |
| "step": 19700 |
| }, |
| { |
| "epoch": 0.34968439876415336, |
| "grad_norm": 7.9375, |
| "learning_rate": 9.524571457551476e-06, |
| "loss": 0.8498989868164063, |
| "step": 19750 |
| }, |
| { |
| "epoch": 0.35056967572305, |
| "grad_norm": 10.1875, |
| "learning_rate": 9.523303580484837e-06, |
| "loss": 0.8017991638183594, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.35145495268194654, |
| "grad_norm": 10.9375, |
| "learning_rate": 9.522035703418198e-06, |
| "loss": 0.7946707153320313, |
| "step": 19850 |
| }, |
| { |
| "epoch": 0.35234022964084316, |
| "grad_norm": 7.03125, |
| "learning_rate": 9.520767826351557e-06, |
| "loss": 0.8487144470214844, |
| "step": 19900 |
| }, |
| { |
| "epoch": 0.3532255065997397, |
| "grad_norm": 9.375, |
| "learning_rate": 9.519499949284918e-06, |
| "loss": 0.7888392639160157, |
| "step": 19950 |
| }, |
| { |
| "epoch": 0.35411078355863634, |
| "grad_norm": 7.84375, |
| "learning_rate": 9.518232072218279e-06, |
| "loss": 0.8027859497070312, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.35411078355863634, |
| "eval_cer": 16.348974799759223, |
| "eval_loss": 0.35486724972724915, |
| "eval_runtime": 393.2747, |
| "eval_samples_per_second": 12.714, |
| "eval_steps_per_second": 1.589, |
| "eval_wer": 32.70174900276158, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.3549960605175329, |
| "grad_norm": 9.6875, |
| "learning_rate": 9.51696419515164e-06, |
| "loss": 0.8532256317138672, |
| "step": 20050 |
| }, |
| { |
| "epoch": 0.3558813374764295, |
| "grad_norm": 10.0625, |
| "learning_rate": 9.515696318084999e-06, |
| "loss": 0.884788589477539, |
| "step": 20100 |
| }, |
| { |
| "epoch": 0.3567666144353261, |
| "grad_norm": 8.875, |
| "learning_rate": 9.51442844101836e-06, |
| "loss": 0.7734761047363281, |
| "step": 20150 |
| }, |
| { |
| "epoch": 0.3576518913942227, |
| "grad_norm": 8.125, |
| "learning_rate": 9.51316056395172e-06, |
| "loss": 0.8991136169433593, |
| "step": 20200 |
| }, |
| { |
| "epoch": 0.3585371683531193, |
| "grad_norm": 8.0625, |
| "learning_rate": 9.511892686885081e-06, |
| "loss": 0.8061054229736329, |
| "step": 20250 |
| }, |
| { |
| "epoch": 0.35942244531201584, |
| "grad_norm": 8.0, |
| "learning_rate": 9.51062480981844e-06, |
| "loss": 0.8342051696777344, |
| "step": 20300 |
| }, |
| { |
| "epoch": 0.36030772227091246, |
| "grad_norm": 6.46875, |
| "learning_rate": 9.509356932751801e-06, |
| "loss": 0.8063926696777344, |
| "step": 20350 |
| }, |
| { |
| "epoch": 0.361192999229809, |
| "grad_norm": 8.875, |
| "learning_rate": 9.508089055685162e-06, |
| "loss": 0.9006285095214843, |
| "step": 20400 |
| }, |
| { |
| "epoch": 0.36207827618870564, |
| "grad_norm": 8.0625, |
| "learning_rate": 9.506821178618521e-06, |
| "loss": 0.7908558654785156, |
| "step": 20450 |
| }, |
| { |
| "epoch": 0.3629635531476022, |
| "grad_norm": 7.0625, |
| "learning_rate": 9.505553301551882e-06, |
| "loss": 0.8398319244384765, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.3638488301064988, |
| "grad_norm": 5.8125, |
| "learning_rate": 9.504285424485243e-06, |
| "loss": 0.8094285583496094, |
| "step": 20550 |
| }, |
| { |
| "epoch": 0.3647341070653954, |
| "grad_norm": 9.1875, |
| "learning_rate": 9.503017547418603e-06, |
| "loss": 0.8495778656005859, |
| "step": 20600 |
| }, |
| { |
| "epoch": 0.365619384024292, |
| "grad_norm": 7.5, |
| "learning_rate": 9.501749670351963e-06, |
| "loss": 0.8105506134033204, |
| "step": 20650 |
| }, |
| { |
| "epoch": 0.36650466098318857, |
| "grad_norm": 8.5625, |
| "learning_rate": 9.500481793285325e-06, |
| "loss": 0.7994151306152344, |
| "step": 20700 |
| }, |
| { |
| "epoch": 0.3673899379420852, |
| "grad_norm": 8.375, |
| "learning_rate": 9.499213916218684e-06, |
| "loss": 0.8393498992919922, |
| "step": 20750 |
| }, |
| { |
| "epoch": 0.36827521490098175, |
| "grad_norm": 10.6875, |
| "learning_rate": 9.497946039152045e-06, |
| "loss": 0.82013427734375, |
| "step": 20800 |
| }, |
| { |
| "epoch": 0.36916049185987837, |
| "grad_norm": 9.5, |
| "learning_rate": 9.496678162085406e-06, |
| "loss": 0.8179158782958984, |
| "step": 20850 |
| }, |
| { |
| "epoch": 0.37004576881877493, |
| "grad_norm": 12.875, |
| "learning_rate": 9.495410285018767e-06, |
| "loss": 0.7779678344726563, |
| "step": 20900 |
| }, |
| { |
| "epoch": 0.37093104577767155, |
| "grad_norm": 7.46875, |
| "learning_rate": 9.494142407952126e-06, |
| "loss": 0.8509443664550781, |
| "step": 20950 |
| }, |
| { |
| "epoch": 0.3718163227365681, |
| "grad_norm": 8.375, |
| "learning_rate": 9.492874530885486e-06, |
| "loss": 0.7994340515136719, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.37270159969546474, |
| "grad_norm": 8.875, |
| "learning_rate": 9.491606653818847e-06, |
| "loss": 0.8334093475341797, |
| "step": 21050 |
| }, |
| { |
| "epoch": 0.3735868766543613, |
| "grad_norm": 10.3125, |
| "learning_rate": 9.490338776752206e-06, |
| "loss": 0.8529891967773438, |
| "step": 21100 |
| }, |
| { |
| "epoch": 0.3744721536132579, |
| "grad_norm": 9.125, |
| "learning_rate": 9.489070899685567e-06, |
| "loss": 0.7969075012207031, |
| "step": 21150 |
| }, |
| { |
| "epoch": 0.3753574305721545, |
| "grad_norm": 11.3125, |
| "learning_rate": 9.487803022618928e-06, |
| "loss": 0.8441764068603516, |
| "step": 21200 |
| }, |
| { |
| "epoch": 0.3762427075310511, |
| "grad_norm": 7.71875, |
| "learning_rate": 9.486535145552289e-06, |
| "loss": 0.8383098602294922, |
| "step": 21250 |
| }, |
| { |
| "epoch": 0.37712798448994767, |
| "grad_norm": 7.8125, |
| "learning_rate": 9.485267268485648e-06, |
| "loss": 0.8604541778564453, |
| "step": 21300 |
| }, |
| { |
| "epoch": 0.3780132614488443, |
| "grad_norm": 9.25, |
| "learning_rate": 9.483999391419009e-06, |
| "loss": 0.8413866424560547, |
| "step": 21350 |
| }, |
| { |
| "epoch": 0.37889853840774085, |
| "grad_norm": 9.625, |
| "learning_rate": 9.48273151435237e-06, |
| "loss": 0.7846895599365235, |
| "step": 21400 |
| }, |
| { |
| "epoch": 0.37978381536663747, |
| "grad_norm": 7.375, |
| "learning_rate": 9.481463637285729e-06, |
| "loss": 0.8002585601806641, |
| "step": 21450 |
| }, |
| { |
| "epoch": 0.38066909232553403, |
| "grad_norm": 8.0, |
| "learning_rate": 9.48019576021909e-06, |
| "loss": 0.8195709991455078, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.38155436928443065, |
| "grad_norm": 9.0625, |
| "learning_rate": 9.47892788315245e-06, |
| "loss": 0.8092010498046875, |
| "step": 21550 |
| }, |
| { |
| "epoch": 0.3824396462433272, |
| "grad_norm": 8.8125, |
| "learning_rate": 9.477660006085811e-06, |
| "loss": 0.8117552947998047, |
| "step": 21600 |
| }, |
| { |
| "epoch": 0.38332492320222383, |
| "grad_norm": 6.21875, |
| "learning_rate": 9.47639212901917e-06, |
| "loss": 0.8895623779296875, |
| "step": 21650 |
| }, |
| { |
| "epoch": 0.3842102001611204, |
| "grad_norm": 8.75, |
| "learning_rate": 9.475124251952531e-06, |
| "loss": 0.7886461639404296, |
| "step": 21700 |
| }, |
| { |
| "epoch": 0.385095477120017, |
| "grad_norm": 8.3125, |
| "learning_rate": 9.473856374885892e-06, |
| "loss": 0.802349853515625, |
| "step": 21750 |
| }, |
| { |
| "epoch": 0.3859807540789136, |
| "grad_norm": 12.0625, |
| "learning_rate": 9.472588497819251e-06, |
| "loss": 0.8550609588623047, |
| "step": 21800 |
| }, |
| { |
| "epoch": 0.3868660310378102, |
| "grad_norm": 8.125, |
| "learning_rate": 9.471320620752612e-06, |
| "loss": 0.7605663299560547, |
| "step": 21850 |
| }, |
| { |
| "epoch": 0.38775130799670676, |
| "grad_norm": 7.53125, |
| "learning_rate": 9.470052743685974e-06, |
| "loss": 0.8628280639648438, |
| "step": 21900 |
| }, |
| { |
| "epoch": 0.3886365849556034, |
| "grad_norm": 8.625, |
| "learning_rate": 9.468784866619333e-06, |
| "loss": 0.8964498138427734, |
| "step": 21950 |
| }, |
| { |
| "epoch": 0.38952186191449995, |
| "grad_norm": 10.75, |
| "learning_rate": 9.467516989552694e-06, |
| "loss": 0.7898972320556641, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.39040713887339656, |
| "grad_norm": 6.375, |
| "learning_rate": 9.466249112486055e-06, |
| "loss": 0.8010289001464844, |
| "step": 22050 |
| }, |
| { |
| "epoch": 0.39129241583229313, |
| "grad_norm": 8.5, |
| "learning_rate": 9.464981235419414e-06, |
| "loss": 0.8129051208496094, |
| "step": 22100 |
| }, |
| { |
| "epoch": 0.39217769279118975, |
| "grad_norm": 10.4375, |
| "learning_rate": 9.463713358352775e-06, |
| "loss": 0.8285366058349609, |
| "step": 22150 |
| }, |
| { |
| "epoch": 0.3930629697500863, |
| "grad_norm": 10.9375, |
| "learning_rate": 9.462445481286136e-06, |
| "loss": 0.8392569732666015, |
| "step": 22200 |
| }, |
| { |
| "epoch": 0.39394824670898293, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.461177604219497e-06, |
| "loss": 0.8171870422363281, |
| "step": 22250 |
| }, |
| { |
| "epoch": 0.3948335236678795, |
| "grad_norm": 8.9375, |
| "learning_rate": 9.459909727152856e-06, |
| "loss": 0.8369279479980469, |
| "step": 22300 |
| }, |
| { |
| "epoch": 0.3957188006267761, |
| "grad_norm": 9.9375, |
| "learning_rate": 9.458641850086216e-06, |
| "loss": 0.7750814819335937, |
| "step": 22350 |
| }, |
| { |
| "epoch": 0.3966040775856727, |
| "grad_norm": 11.0625, |
| "learning_rate": 9.457373973019577e-06, |
| "loss": 0.8476492309570313, |
| "step": 22400 |
| }, |
| { |
| "epoch": 0.3974893545445693, |
| "grad_norm": 8.375, |
| "learning_rate": 9.456106095952936e-06, |
| "loss": 0.8161160278320313, |
| "step": 22450 |
| }, |
| { |
| "epoch": 0.39837463150346586, |
| "grad_norm": 8.9375, |
| "learning_rate": 9.454838218886297e-06, |
| "loss": 0.861170654296875, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.3992599084623624, |
| "grad_norm": 8.125, |
| "learning_rate": 9.453570341819658e-06, |
| "loss": 0.7938341522216796, |
| "step": 22550 |
| }, |
| { |
| "epoch": 0.40014518542125904, |
| "grad_norm": 6.8125, |
| "learning_rate": 9.452302464753019e-06, |
| "loss": 0.7918325805664063, |
| "step": 22600 |
| }, |
| { |
| "epoch": 0.4010304623801556, |
| "grad_norm": 8.5, |
| "learning_rate": 9.451034587686378e-06, |
| "loss": 0.8244574737548828, |
| "step": 22650 |
| }, |
| { |
| "epoch": 0.4019157393390522, |
| "grad_norm": 7.75, |
| "learning_rate": 9.449766710619739e-06, |
| "loss": 0.759022216796875, |
| "step": 22700 |
| }, |
| { |
| "epoch": 0.4028010162979488, |
| "grad_norm": 9.0625, |
| "learning_rate": 9.4484988335531e-06, |
| "loss": 0.7742694854736328, |
| "step": 22750 |
| }, |
| { |
| "epoch": 0.4036862932568454, |
| "grad_norm": 7.84375, |
| "learning_rate": 9.447230956486459e-06, |
| "loss": 0.8123777008056641, |
| "step": 22800 |
| }, |
| { |
| "epoch": 0.40457157021574197, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.44596307941982e-06, |
| "loss": 0.7599580383300781, |
| "step": 22850 |
| }, |
| { |
| "epoch": 0.4054568471746386, |
| "grad_norm": 7.625, |
| "learning_rate": 9.44469520235318e-06, |
| "loss": 0.7971720123291015, |
| "step": 22900 |
| }, |
| { |
| "epoch": 0.40634212413353515, |
| "grad_norm": 9.625, |
| "learning_rate": 9.443427325286541e-06, |
| "loss": 0.7970821380615234, |
| "step": 22950 |
| }, |
| { |
| "epoch": 0.4072274010924318, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.4421594482199e-06, |
| "loss": 0.8227862548828125, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.40811267805132834, |
| "grad_norm": 8.25, |
| "learning_rate": 9.440891571153263e-06, |
| "loss": 0.8271920013427735, |
| "step": 23050 |
| }, |
| { |
| "epoch": 0.40899795501022496, |
| "grad_norm": 8.5, |
| "learning_rate": 9.439623694086622e-06, |
| "loss": 0.83154541015625, |
| "step": 23100 |
| }, |
| { |
| "epoch": 0.4098832319691215, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.438355817019983e-06, |
| "loss": 0.8107095336914063, |
| "step": 23150 |
| }, |
| { |
| "epoch": 0.41076850892801814, |
| "grad_norm": 8.9375, |
| "learning_rate": 9.437087939953343e-06, |
| "loss": 0.8437236785888672, |
| "step": 23200 |
| }, |
| { |
| "epoch": 0.4116537858869147, |
| "grad_norm": 8.5625, |
| "learning_rate": 9.435820062886704e-06, |
| "loss": 0.8471569061279297, |
| "step": 23250 |
| }, |
| { |
| "epoch": 0.4125390628458113, |
| "grad_norm": 7.875, |
| "learning_rate": 9.434552185820063e-06, |
| "loss": 0.885667724609375, |
| "step": 23300 |
| }, |
| { |
| "epoch": 0.4134243398047079, |
| "grad_norm": 8.3125, |
| "learning_rate": 9.433284308753424e-06, |
| "loss": 0.8514400482177734, |
| "step": 23350 |
| }, |
| { |
| "epoch": 0.4143096167636045, |
| "grad_norm": 10.1875, |
| "learning_rate": 9.432016431686785e-06, |
| "loss": 0.7581684112548828, |
| "step": 23400 |
| }, |
| { |
| "epoch": 0.41519489372250107, |
| "grad_norm": 10.0, |
| "learning_rate": 9.430748554620146e-06, |
| "loss": 0.824618911743164, |
| "step": 23450 |
| }, |
| { |
| "epoch": 0.4160801706813977, |
| "grad_norm": 7.59375, |
| "learning_rate": 9.429480677553505e-06, |
| "loss": 0.8305864715576172, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.41696544764029425, |
| "grad_norm": 9.1875, |
| "learning_rate": 9.428212800486866e-06, |
| "loss": 0.8161003875732422, |
| "step": 23550 |
| }, |
| { |
| "epoch": 0.41785072459919087, |
| "grad_norm": 12.0, |
| "learning_rate": 9.426944923420227e-06, |
| "loss": 0.8694761657714843, |
| "step": 23600 |
| }, |
| { |
| "epoch": 0.41873600155808743, |
| "grad_norm": 9.4375, |
| "learning_rate": 9.425677046353586e-06, |
| "loss": 0.8649164581298828, |
| "step": 23650 |
| }, |
| { |
| "epoch": 0.41962127851698405, |
| "grad_norm": 7.9375, |
| "learning_rate": 9.424409169286946e-06, |
| "loss": 0.8016709899902343, |
| "step": 23700 |
| }, |
| { |
| "epoch": 0.4205065554758806, |
| "grad_norm": 6.625, |
| "learning_rate": 9.423141292220307e-06, |
| "loss": 0.8073011016845704, |
| "step": 23750 |
| }, |
| { |
| "epoch": 0.42139183243477724, |
| "grad_norm": 8.5, |
| "learning_rate": 9.421873415153668e-06, |
| "loss": 0.8519166564941406, |
| "step": 23800 |
| }, |
| { |
| "epoch": 0.4222771093936738, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.420605538087027e-06, |
| "loss": 0.7963951110839844, |
| "step": 23850 |
| }, |
| { |
| "epoch": 0.4231623863525704, |
| "grad_norm": 9.4375, |
| "learning_rate": 9.419337661020388e-06, |
| "loss": 0.8319783020019531, |
| "step": 23900 |
| }, |
| { |
| "epoch": 0.424047663311467, |
| "grad_norm": 8.5, |
| "learning_rate": 9.418069783953749e-06, |
| "loss": 0.8024713134765625, |
| "step": 23950 |
| }, |
| { |
| "epoch": 0.4249329402703636, |
| "grad_norm": 6.5, |
| "learning_rate": 9.416801906887108e-06, |
| "loss": 0.8116042327880859, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.42581821722926017, |
| "grad_norm": 5.96875, |
| "learning_rate": 9.415534029820469e-06, |
| "loss": 0.8232134246826172, |
| "step": 24050 |
| }, |
| { |
| "epoch": 0.4267034941881568, |
| "grad_norm": 9.5625, |
| "learning_rate": 9.41426615275383e-06, |
| "loss": 0.8045470428466797, |
| "step": 24100 |
| }, |
| { |
| "epoch": 0.42758877114705335, |
| "grad_norm": 8.5625, |
| "learning_rate": 9.41299827568719e-06, |
| "loss": 0.8654727935791016, |
| "step": 24150 |
| }, |
| { |
| "epoch": 0.42847404810594997, |
| "grad_norm": 7.5625, |
| "learning_rate": 9.411730398620551e-06, |
| "loss": 0.8151998138427734, |
| "step": 24200 |
| }, |
| { |
| "epoch": 0.42935932506484653, |
| "grad_norm": 7.1875, |
| "learning_rate": 9.410462521553912e-06, |
| "loss": 0.8104602813720703, |
| "step": 24250 |
| }, |
| { |
| "epoch": 0.43024460202374315, |
| "grad_norm": 8.625, |
| "learning_rate": 9.409194644487271e-06, |
| "loss": 0.8054197692871093, |
| "step": 24300 |
| }, |
| { |
| "epoch": 0.4311298789826397, |
| "grad_norm": 7.40625, |
| "learning_rate": 9.407926767420632e-06, |
| "loss": 0.8097126770019532, |
| "step": 24350 |
| }, |
| { |
| "epoch": 0.43201515594153633, |
| "grad_norm": 8.3125, |
| "learning_rate": 9.406658890353993e-06, |
| "loss": 0.7878807067871094, |
| "step": 24400 |
| }, |
| { |
| "epoch": 0.4329004329004329, |
| "grad_norm": 9.4375, |
| "learning_rate": 9.405391013287353e-06, |
| "loss": 0.8757616424560547, |
| "step": 24450 |
| }, |
| { |
| "epoch": 0.4337857098593295, |
| "grad_norm": 7.53125, |
| "learning_rate": 9.404123136220713e-06, |
| "loss": 0.7997799682617187, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.4346709868182261, |
| "grad_norm": 11.0625, |
| "learning_rate": 9.402855259154073e-06, |
| "loss": 0.7962652587890625, |
| "step": 24550 |
| }, |
| { |
| "epoch": 0.4355562637771227, |
| "grad_norm": 8.25, |
| "learning_rate": 9.401587382087434e-06, |
| "loss": 0.7983963775634766, |
| "step": 24600 |
| }, |
| { |
| "epoch": 0.43644154073601926, |
| "grad_norm": 6.34375, |
| "learning_rate": 9.400319505020793e-06, |
| "loss": 0.8001494598388672, |
| "step": 24650 |
| }, |
| { |
| "epoch": 0.4373268176949159, |
| "grad_norm": 9.5, |
| "learning_rate": 9.399051627954154e-06, |
| "loss": 0.7819596099853515, |
| "step": 24700 |
| }, |
| { |
| "epoch": 0.43821209465381245, |
| "grad_norm": 8.0625, |
| "learning_rate": 9.397783750887515e-06, |
| "loss": 0.8707284545898437, |
| "step": 24750 |
| }, |
| { |
| "epoch": 0.439097371612709, |
| "grad_norm": 10.3125, |
| "learning_rate": 9.396515873820876e-06, |
| "loss": 0.805533447265625, |
| "step": 24800 |
| }, |
| { |
| "epoch": 0.43998264857160563, |
| "grad_norm": 6.46875, |
| "learning_rate": 9.395247996754235e-06, |
| "loss": 0.8091240692138671, |
| "step": 24850 |
| }, |
| { |
| "epoch": 0.4408679255305022, |
| "grad_norm": 6.5625, |
| "learning_rate": 9.393980119687596e-06, |
| "loss": 0.7720470428466797, |
| "step": 24900 |
| }, |
| { |
| "epoch": 0.4417532024893988, |
| "grad_norm": 7.71875, |
| "learning_rate": 9.392712242620956e-06, |
| "loss": 0.7984862518310547, |
| "step": 24950 |
| }, |
| { |
| "epoch": 0.4426384794482954, |
| "grad_norm": 10.1875, |
| "learning_rate": 9.391444365554316e-06, |
| "loss": 0.85141845703125, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.4426384794482954, |
| "eval_cer": 15.512762807546515, |
| "eval_loss": 0.3506615161895752, |
| "eval_runtime": 388.5865, |
| "eval_samples_per_second": 12.867, |
| "eval_steps_per_second": 1.608, |
| "eval_wer": 31.382325866830318, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.443523756407192, |
| "grad_norm": 8.1875, |
| "learning_rate": 9.390176488487676e-06, |
| "loss": 0.799303207397461, |
| "step": 25050 |
| }, |
| { |
| "epoch": 0.44440903336608856, |
| "grad_norm": 7.5625, |
| "learning_rate": 9.388908611421037e-06, |
| "loss": 0.7852619934082031, |
| "step": 25100 |
| }, |
| { |
| "epoch": 0.4452943103249852, |
| "grad_norm": 9.125, |
| "learning_rate": 9.387640734354398e-06, |
| "loss": 0.7992433166503906, |
| "step": 25150 |
| }, |
| { |
| "epoch": 0.44617958728388174, |
| "grad_norm": 7.5625, |
| "learning_rate": 9.386372857287757e-06, |
| "loss": 0.8536985015869141, |
| "step": 25200 |
| }, |
| { |
| "epoch": 0.44706486424277836, |
| "grad_norm": 8.375, |
| "learning_rate": 9.385104980221118e-06, |
| "loss": 0.7951334381103515, |
| "step": 25250 |
| }, |
| { |
| "epoch": 0.4479501412016749, |
| "grad_norm": 6.15625, |
| "learning_rate": 9.383837103154479e-06, |
| "loss": 0.8157552337646484, |
| "step": 25300 |
| }, |
| { |
| "epoch": 0.44883541816057154, |
| "grad_norm": 7.875, |
| "learning_rate": 9.38256922608784e-06, |
| "loss": 0.8191168212890625, |
| "step": 25350 |
| }, |
| { |
| "epoch": 0.4497206951194681, |
| "grad_norm": 10.5625, |
| "learning_rate": 9.3813013490212e-06, |
| "loss": 0.7932091522216796, |
| "step": 25400 |
| }, |
| { |
| "epoch": 0.4506059720783647, |
| "grad_norm": 7.0625, |
| "learning_rate": 9.380033471954561e-06, |
| "loss": 0.8196167755126953, |
| "step": 25450 |
| }, |
| { |
| "epoch": 0.4514912490372613, |
| "grad_norm": 7.96875, |
| "learning_rate": 9.37876559488792e-06, |
| "loss": 0.80684326171875, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.4523765259961579, |
| "grad_norm": 8.375, |
| "learning_rate": 9.377497717821281e-06, |
| "loss": 0.8326478576660157, |
| "step": 25550 |
| }, |
| { |
| "epoch": 0.45326180295505447, |
| "grad_norm": 10.5625, |
| "learning_rate": 9.376229840754642e-06, |
| "loss": 0.8006377410888672, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.4541470799139511, |
| "grad_norm": 8.8125, |
| "learning_rate": 9.374961963688001e-06, |
| "loss": 0.8118048858642578, |
| "step": 25650 |
| }, |
| { |
| "epoch": 0.45503235687284765, |
| "grad_norm": 6.75, |
| "learning_rate": 9.373694086621362e-06, |
| "loss": 0.8060718536376953, |
| "step": 25700 |
| }, |
| { |
| "epoch": 0.4559176338317443, |
| "grad_norm": 10.0625, |
| "learning_rate": 9.372426209554723e-06, |
| "loss": 0.7967828369140625, |
| "step": 25750 |
| }, |
| { |
| "epoch": 0.45680291079064084, |
| "grad_norm": 9.3125, |
| "learning_rate": 9.371158332488083e-06, |
| "loss": 0.8676046752929687, |
| "step": 25800 |
| }, |
| { |
| "epoch": 0.45768818774953746, |
| "grad_norm": 6.5625, |
| "learning_rate": 9.369890455421443e-06, |
| "loss": 0.83003662109375, |
| "step": 25850 |
| }, |
| { |
| "epoch": 0.458573464708434, |
| "grad_norm": 7.84375, |
| "learning_rate": 9.368622578354803e-06, |
| "loss": 0.7804559326171875, |
| "step": 25900 |
| }, |
| { |
| "epoch": 0.45945874166733064, |
| "grad_norm": 10.0625, |
| "learning_rate": 9.367354701288164e-06, |
| "loss": 0.8414221954345703, |
| "step": 25950 |
| }, |
| { |
| "epoch": 0.4603440186262272, |
| "grad_norm": 10.0625, |
| "learning_rate": 9.366086824221523e-06, |
| "loss": 0.8436249542236328, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.4612292955851238, |
| "grad_norm": 9.75, |
| "learning_rate": 9.364818947154884e-06, |
| "loss": 0.8198944091796875, |
| "step": 26050 |
| }, |
| { |
| "epoch": 0.4621145725440204, |
| "grad_norm": 7.21875, |
| "learning_rate": 9.363551070088245e-06, |
| "loss": 0.822802505493164, |
| "step": 26100 |
| }, |
| { |
| "epoch": 0.462999849502917, |
| "grad_norm": 9.5, |
| "learning_rate": 9.362283193021606e-06, |
| "loss": 0.8616585540771484, |
| "step": 26150 |
| }, |
| { |
| "epoch": 0.46388512646181357, |
| "grad_norm": 10.5625, |
| "learning_rate": 9.361015315954965e-06, |
| "loss": 0.874574203491211, |
| "step": 26200 |
| }, |
| { |
| "epoch": 0.4647704034207102, |
| "grad_norm": 8.25, |
| "learning_rate": 9.359747438888326e-06, |
| "loss": 0.8157247161865234, |
| "step": 26250 |
| }, |
| { |
| "epoch": 0.46565568037960675, |
| "grad_norm": 9.5, |
| "learning_rate": 9.358479561821686e-06, |
| "loss": 0.8261668395996093, |
| "step": 26300 |
| }, |
| { |
| "epoch": 0.46654095733850337, |
| "grad_norm": 9.25, |
| "learning_rate": 9.357211684755047e-06, |
| "loss": 0.7921039581298828, |
| "step": 26350 |
| }, |
| { |
| "epoch": 0.46742623429739993, |
| "grad_norm": 8.125, |
| "learning_rate": 9.355943807688406e-06, |
| "loss": 0.8063948059082031, |
| "step": 26400 |
| }, |
| { |
| "epoch": 0.46831151125629655, |
| "grad_norm": 8.4375, |
| "learning_rate": 9.354675930621767e-06, |
| "loss": 0.8948915100097656, |
| "step": 26450 |
| }, |
| { |
| "epoch": 0.4691967882151931, |
| "grad_norm": 8.375, |
| "learning_rate": 9.353408053555128e-06, |
| "loss": 0.8333416748046875, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.47008206517408974, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.352140176488489e-06, |
| "loss": 0.8180615997314453, |
| "step": 26550 |
| }, |
| { |
| "epoch": 0.4709673421329863, |
| "grad_norm": 8.3125, |
| "learning_rate": 9.35087229942185e-06, |
| "loss": 0.8063065338134766, |
| "step": 26600 |
| }, |
| { |
| "epoch": 0.4718526190918829, |
| "grad_norm": 8.5625, |
| "learning_rate": 9.34960442235521e-06, |
| "loss": 0.7838368225097656, |
| "step": 26650 |
| }, |
| { |
| "epoch": 0.4727378960507795, |
| "grad_norm": 9.5, |
| "learning_rate": 9.34833654528857e-06, |
| "loss": 0.7889875030517578, |
| "step": 26700 |
| }, |
| { |
| "epoch": 0.4736231730096761, |
| "grad_norm": 8.875, |
| "learning_rate": 9.34706866822193e-06, |
| "loss": 0.8149432373046875, |
| "step": 26750 |
| }, |
| { |
| "epoch": 0.47450844996857267, |
| "grad_norm": 7.59375, |
| "learning_rate": 9.345800791155291e-06, |
| "loss": 0.8343911743164063, |
| "step": 26800 |
| }, |
| { |
| "epoch": 0.4753937269274693, |
| "grad_norm": 6.09375, |
| "learning_rate": 9.34453291408865e-06, |
| "loss": 0.8231613159179687, |
| "step": 26850 |
| }, |
| { |
| "epoch": 0.47627900388636585, |
| "grad_norm": 8.875, |
| "learning_rate": 9.343265037022011e-06, |
| "loss": 0.8221437835693359, |
| "step": 26900 |
| }, |
| { |
| "epoch": 0.47716428084526247, |
| "grad_norm": 7.90625, |
| "learning_rate": 9.341997159955372e-06, |
| "loss": 0.8173371124267578, |
| "step": 26950 |
| }, |
| { |
| "epoch": 0.47804955780415903, |
| "grad_norm": 8.25, |
| "learning_rate": 9.340729282888733e-06, |
| "loss": 0.8692239379882812, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.4789348347630556, |
| "grad_norm": 6.40625, |
| "learning_rate": 9.339461405822092e-06, |
| "loss": 0.7678931427001953, |
| "step": 27050 |
| }, |
| { |
| "epoch": 0.4798201117219522, |
| "grad_norm": 9.0625, |
| "learning_rate": 9.338193528755453e-06, |
| "loss": 0.8847496032714843, |
| "step": 27100 |
| }, |
| { |
| "epoch": 0.4807053886808488, |
| "grad_norm": 7.96875, |
| "learning_rate": 9.336925651688813e-06, |
| "loss": 0.8440608978271484, |
| "step": 27150 |
| }, |
| { |
| "epoch": 0.4815906656397454, |
| "grad_norm": 7.34375, |
| "learning_rate": 9.335657774622173e-06, |
| "loss": 0.7591238403320313, |
| "step": 27200 |
| }, |
| { |
| "epoch": 0.48247594259864196, |
| "grad_norm": 7.4375, |
| "learning_rate": 9.334389897555533e-06, |
| "loss": 0.8198709106445312, |
| "step": 27250 |
| }, |
| { |
| "epoch": 0.4833612195575386, |
| "grad_norm": 8.9375, |
| "learning_rate": 9.333122020488894e-06, |
| "loss": 0.7931713104248047, |
| "step": 27300 |
| }, |
| { |
| "epoch": 0.48424649651643514, |
| "grad_norm": 9.25, |
| "learning_rate": 9.331854143422255e-06, |
| "loss": 0.8829562377929687, |
| "step": 27350 |
| }, |
| { |
| "epoch": 0.48513177347533176, |
| "grad_norm": 8.3125, |
| "learning_rate": 9.330586266355614e-06, |
| "loss": 0.857093734741211, |
| "step": 27400 |
| }, |
| { |
| "epoch": 0.4860170504342283, |
| "grad_norm": 8.25, |
| "learning_rate": 9.329318389288975e-06, |
| "loss": 0.8017523193359375, |
| "step": 27450 |
| }, |
| { |
| "epoch": 0.48690232739312495, |
| "grad_norm": 9.9375, |
| "learning_rate": 9.328050512222336e-06, |
| "loss": 0.8276552581787109, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.4877876043520215, |
| "grad_norm": 8.125, |
| "learning_rate": 9.326782635155695e-06, |
| "loss": 0.7625294494628906, |
| "step": 27550 |
| }, |
| { |
| "epoch": 0.48867288131091813, |
| "grad_norm": 6.4375, |
| "learning_rate": 9.325514758089056e-06, |
| "loss": 0.8343047332763672, |
| "step": 27600 |
| }, |
| { |
| "epoch": 0.4895581582698147, |
| "grad_norm": 6.59375, |
| "learning_rate": 9.324246881022418e-06, |
| "loss": 0.8407593536376953, |
| "step": 27650 |
| }, |
| { |
| "epoch": 0.4904434352287113, |
| "grad_norm": 9.75, |
| "learning_rate": 9.322979003955777e-06, |
| "loss": 0.7998301696777343, |
| "step": 27700 |
| }, |
| { |
| "epoch": 0.4913287121876079, |
| "grad_norm": 9.125, |
| "learning_rate": 9.321711126889138e-06, |
| "loss": 0.8573383331298828, |
| "step": 27750 |
| }, |
| { |
| "epoch": 0.4922139891465045, |
| "grad_norm": 10.875, |
| "learning_rate": 9.320443249822499e-06, |
| "loss": 0.8192384338378906, |
| "step": 27800 |
| }, |
| { |
| "epoch": 0.49309926610540106, |
| "grad_norm": 8.8125, |
| "learning_rate": 9.319175372755858e-06, |
| "loss": 0.7779326629638672, |
| "step": 27850 |
| }, |
| { |
| "epoch": 0.4939845430642977, |
| "grad_norm": 7.90625, |
| "learning_rate": 9.317907495689219e-06, |
| "loss": 0.8377117156982422, |
| "step": 27900 |
| }, |
| { |
| "epoch": 0.49486982002319424, |
| "grad_norm": 6.9375, |
| "learning_rate": 9.31663961862258e-06, |
| "loss": 0.8032022094726563, |
| "step": 27950 |
| }, |
| { |
| "epoch": 0.49575509698209086, |
| "grad_norm": 9.4375, |
| "learning_rate": 9.31537174155594e-06, |
| "loss": 0.8107691955566406, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.4966403739409874, |
| "grad_norm": 7.75, |
| "learning_rate": 9.3141038644893e-06, |
| "loss": 0.8325393676757813, |
| "step": 28050 |
| }, |
| { |
| "epoch": 0.49752565089988404, |
| "grad_norm": 9.625, |
| "learning_rate": 9.31283598742266e-06, |
| "loss": 0.80835693359375, |
| "step": 28100 |
| }, |
| { |
| "epoch": 0.4984109278587806, |
| "grad_norm": 9.0625, |
| "learning_rate": 9.311568110356021e-06, |
| "loss": 0.8398920440673828, |
| "step": 28150 |
| }, |
| { |
| "epoch": 0.4992962048176772, |
| "grad_norm": 9.5, |
| "learning_rate": 9.31030023328938e-06, |
| "loss": 0.8618205261230468, |
| "step": 28200 |
| }, |
| { |
| "epoch": 0.5001814817765738, |
| "grad_norm": 7.3125, |
| "learning_rate": 9.309032356222741e-06, |
| "loss": 0.7820648956298828, |
| "step": 28250 |
| }, |
| { |
| "epoch": 0.5010667587354704, |
| "grad_norm": 5.96875, |
| "learning_rate": 9.307764479156102e-06, |
| "loss": 0.8232012176513672, |
| "step": 28300 |
| }, |
| { |
| "epoch": 0.501952035694367, |
| "grad_norm": 7.15625, |
| "learning_rate": 9.306496602089463e-06, |
| "loss": 0.7935179138183593, |
| "step": 28350 |
| }, |
| { |
| "epoch": 0.5028373126532636, |
| "grad_norm": 7.75, |
| "learning_rate": 9.305228725022822e-06, |
| "loss": 0.812857666015625, |
| "step": 28400 |
| }, |
| { |
| "epoch": 0.5037225896121602, |
| "grad_norm": 6.75, |
| "learning_rate": 9.303960847956183e-06, |
| "loss": 0.7825227355957032, |
| "step": 28450 |
| }, |
| { |
| "epoch": 0.5046078665710567, |
| "grad_norm": 6.875, |
| "learning_rate": 9.302692970889543e-06, |
| "loss": 0.8190470886230469, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.5054931435299533, |
| "grad_norm": 6.28125, |
| "learning_rate": 9.301425093822903e-06, |
| "loss": 0.855802993774414, |
| "step": 28550 |
| }, |
| { |
| "epoch": 0.50637842048885, |
| "grad_norm": 10.5625, |
| "learning_rate": 9.300157216756263e-06, |
| "loss": 0.848614501953125, |
| "step": 28600 |
| }, |
| { |
| "epoch": 0.5072636974477466, |
| "grad_norm": 8.125, |
| "learning_rate": 9.298889339689624e-06, |
| "loss": 0.7645280456542969, |
| "step": 28650 |
| }, |
| { |
| "epoch": 0.5081489744066431, |
| "grad_norm": 9.1875, |
| "learning_rate": 9.297621462622985e-06, |
| "loss": 0.8297785949707032, |
| "step": 28700 |
| }, |
| { |
| "epoch": 0.5090342513655397, |
| "grad_norm": 6.40625, |
| "learning_rate": 9.296353585556344e-06, |
| "loss": 0.8629362487792969, |
| "step": 28750 |
| }, |
| { |
| "epoch": 0.5099195283244363, |
| "grad_norm": 9.375, |
| "learning_rate": 9.295085708489707e-06, |
| "loss": 0.8396317291259766, |
| "step": 28800 |
| }, |
| { |
| "epoch": 0.5108048052833329, |
| "grad_norm": 9.9375, |
| "learning_rate": 9.293817831423066e-06, |
| "loss": 0.8256559753417969, |
| "step": 28850 |
| }, |
| { |
| "epoch": 0.5116900822422294, |
| "grad_norm": 8.9375, |
| "learning_rate": 9.292549954356427e-06, |
| "loss": 0.8319975280761719, |
| "step": 28900 |
| }, |
| { |
| "epoch": 0.5125753592011261, |
| "grad_norm": 8.0, |
| "learning_rate": 9.291282077289787e-06, |
| "loss": 0.7743982696533203, |
| "step": 28950 |
| }, |
| { |
| "epoch": 0.5134606361600227, |
| "grad_norm": 7.21875, |
| "learning_rate": 9.290014200223148e-06, |
| "loss": 0.8560353088378906, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.5143459131189193, |
| "grad_norm": 9.25, |
| "learning_rate": 9.288746323156507e-06, |
| "loss": 0.872972640991211, |
| "step": 29050 |
| }, |
| { |
| "epoch": 0.5152311900778158, |
| "grad_norm": 9.0625, |
| "learning_rate": 9.287478446089868e-06, |
| "loss": 0.8381455230712891, |
| "step": 29100 |
| }, |
| { |
| "epoch": 0.5161164670367124, |
| "grad_norm": 9.3125, |
| "learning_rate": 9.286210569023229e-06, |
| "loss": 0.7603321838378906, |
| "step": 29150 |
| }, |
| { |
| "epoch": 0.517001743995609, |
| "grad_norm": 7.28125, |
| "learning_rate": 9.284942691956588e-06, |
| "loss": 0.8029882049560547, |
| "step": 29200 |
| }, |
| { |
| "epoch": 0.5178870209545057, |
| "grad_norm": 8.5625, |
| "learning_rate": 9.283674814889949e-06, |
| "loss": 0.8445874786376953, |
| "step": 29250 |
| }, |
| { |
| "epoch": 0.5187722979134022, |
| "grad_norm": 7.21875, |
| "learning_rate": 9.28240693782331e-06, |
| "loss": 0.806130142211914, |
| "step": 29300 |
| }, |
| { |
| "epoch": 0.5196575748722988, |
| "grad_norm": 8.625, |
| "learning_rate": 9.28113906075667e-06, |
| "loss": 0.8128162384033203, |
| "step": 29350 |
| }, |
| { |
| "epoch": 0.5205428518311954, |
| "grad_norm": 9.3125, |
| "learning_rate": 9.27987118369003e-06, |
| "loss": 0.8138497161865235, |
| "step": 29400 |
| }, |
| { |
| "epoch": 0.5214281287900919, |
| "grad_norm": 8.875, |
| "learning_rate": 9.27860330662339e-06, |
| "loss": 0.7841197967529296, |
| "step": 29450 |
| }, |
| { |
| "epoch": 0.5223134057489885, |
| "grad_norm": 7.46875, |
| "learning_rate": 9.277335429556751e-06, |
| "loss": 0.8085577392578125, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.5231986827078852, |
| "grad_norm": 10.1875, |
| "learning_rate": 9.276067552490112e-06, |
| "loss": 0.8392274475097656, |
| "step": 29550 |
| }, |
| { |
| "epoch": 0.5240839596667818, |
| "grad_norm": 8.1875, |
| "learning_rate": 9.274799675423471e-06, |
| "loss": 0.8423346710205079, |
| "step": 29600 |
| }, |
| { |
| "epoch": 0.5249692366256783, |
| "grad_norm": 8.9375, |
| "learning_rate": 9.273531798356832e-06, |
| "loss": 0.7886857604980468, |
| "step": 29650 |
| }, |
| { |
| "epoch": 0.5258545135845749, |
| "grad_norm": 9.5, |
| "learning_rate": 9.272263921290193e-06, |
| "loss": 0.8614559936523437, |
| "step": 29700 |
| }, |
| { |
| "epoch": 0.5267397905434715, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.270996044223552e-06, |
| "loss": 0.7886000061035157, |
| "step": 29750 |
| }, |
| { |
| "epoch": 0.5276250675023682, |
| "grad_norm": 9.0, |
| "learning_rate": 9.269728167156913e-06, |
| "loss": 0.8309735870361328, |
| "step": 29800 |
| }, |
| { |
| "epoch": 0.5285103444612647, |
| "grad_norm": 10.25, |
| "learning_rate": 9.268460290090273e-06, |
| "loss": 0.8716236877441407, |
| "step": 29850 |
| }, |
| { |
| "epoch": 0.5293956214201613, |
| "grad_norm": 8.625, |
| "learning_rate": 9.267192413023634e-06, |
| "loss": 0.8325408935546875, |
| "step": 29900 |
| }, |
| { |
| "epoch": 0.5302808983790579, |
| "grad_norm": 6.96875, |
| "learning_rate": 9.265924535956993e-06, |
| "loss": 0.7999062347412109, |
| "step": 29950 |
| }, |
| { |
| "epoch": 0.5311661753379545, |
| "grad_norm": 8.3125, |
| "learning_rate": 9.264656658890356e-06, |
| "loss": 0.8242444610595703, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.5311661753379545, |
| "eval_cer": 14.722645510105584, |
| "eval_loss": 0.3480595052242279, |
| "eval_runtime": 379.8738, |
| "eval_samples_per_second": 13.162, |
| "eval_steps_per_second": 1.645, |
| "eval_wer": 30.56407896082643, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.532051452296851, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.263388781823715e-06, |
| "loss": 0.9031015777587891, |
| "step": 30050 |
| }, |
| { |
| "epoch": 0.5329367292557476, |
| "grad_norm": 13.3125, |
| "learning_rate": 9.262120904757076e-06, |
| "loss": 0.8271858978271485, |
| "step": 30100 |
| }, |
| { |
| "epoch": 0.5338220062146443, |
| "grad_norm": 7.3125, |
| "learning_rate": 9.260853027690437e-06, |
| "loss": 0.7925968170166016, |
| "step": 30150 |
| }, |
| { |
| "epoch": 0.5347072831735409, |
| "grad_norm": 10.25, |
| "learning_rate": 9.259585150623797e-06, |
| "loss": 0.8345429229736329, |
| "step": 30200 |
| }, |
| { |
| "epoch": 0.5355925601324374, |
| "grad_norm": 9.625, |
| "learning_rate": 9.258317273557156e-06, |
| "loss": 0.8198999786376953, |
| "step": 30250 |
| }, |
| { |
| "epoch": 0.536477837091334, |
| "grad_norm": 8.0, |
| "learning_rate": 9.257049396490517e-06, |
| "loss": 0.8118746185302734, |
| "step": 30300 |
| }, |
| { |
| "epoch": 0.5373631140502306, |
| "grad_norm": 9.125, |
| "learning_rate": 9.255781519423878e-06, |
| "loss": 0.7819632720947266, |
| "step": 30350 |
| }, |
| { |
| "epoch": 0.5382483910091272, |
| "grad_norm": 8.125, |
| "learning_rate": 9.254513642357237e-06, |
| "loss": 0.84641845703125, |
| "step": 30400 |
| }, |
| { |
| "epoch": 0.5391336679680238, |
| "grad_norm": 8.5625, |
| "learning_rate": 9.253245765290598e-06, |
| "loss": 0.8433683776855468, |
| "step": 30450 |
| }, |
| { |
| "epoch": 0.5400189449269204, |
| "grad_norm": 9.375, |
| "learning_rate": 9.251977888223959e-06, |
| "loss": 0.7750820922851562, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.540904221885817, |
| "grad_norm": 8.75, |
| "learning_rate": 9.25071001115732e-06, |
| "loss": 0.8537428283691406, |
| "step": 30550 |
| }, |
| { |
| "epoch": 0.5417894988447136, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.249442134090679e-06, |
| "loss": 0.8174424743652344, |
| "step": 30600 |
| }, |
| { |
| "epoch": 0.5426747758036101, |
| "grad_norm": 7.28125, |
| "learning_rate": 9.24817425702404e-06, |
| "loss": 0.7803053283691406, |
| "step": 30650 |
| }, |
| { |
| "epoch": 0.5435600527625067, |
| "grad_norm": 8.875, |
| "learning_rate": 9.2469063799574e-06, |
| "loss": 0.8024676513671875, |
| "step": 30700 |
| }, |
| { |
| "epoch": 0.5444453297214034, |
| "grad_norm": 8.125, |
| "learning_rate": 9.24563850289076e-06, |
| "loss": 0.830534439086914, |
| "step": 30750 |
| }, |
| { |
| "epoch": 0.5453306066803, |
| "grad_norm": 8.25, |
| "learning_rate": 9.24437062582412e-06, |
| "loss": 0.8581230926513672, |
| "step": 30800 |
| }, |
| { |
| "epoch": 0.5462158836391965, |
| "grad_norm": 9.25, |
| "learning_rate": 9.243102748757481e-06, |
| "loss": 0.8423960876464843, |
| "step": 30850 |
| }, |
| { |
| "epoch": 0.5471011605980931, |
| "grad_norm": 6.78125, |
| "learning_rate": 9.241834871690842e-06, |
| "loss": 0.8226587677001953, |
| "step": 30900 |
| }, |
| { |
| "epoch": 0.5479864375569897, |
| "grad_norm": 9.6875, |
| "learning_rate": 9.240566994624201e-06, |
| "loss": 0.7333302307128906, |
| "step": 30950 |
| }, |
| { |
| "epoch": 0.5488717145158863, |
| "grad_norm": 9.4375, |
| "learning_rate": 9.239299117557562e-06, |
| "loss": 0.8279609680175781, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.5497569914747829, |
| "grad_norm": 10.3125, |
| "learning_rate": 9.238031240490923e-06, |
| "loss": 0.8205570983886719, |
| "step": 31050 |
| }, |
| { |
| "epoch": 0.5506422684336795, |
| "grad_norm": 10.5625, |
| "learning_rate": 9.236763363424282e-06, |
| "loss": 0.7977956390380859, |
| "step": 31100 |
| }, |
| { |
| "epoch": 0.5515275453925761, |
| "grad_norm": 9.4375, |
| "learning_rate": 9.235495486357644e-06, |
| "loss": 0.7922000885009766, |
| "step": 31150 |
| }, |
| { |
| "epoch": 0.5524128223514727, |
| "grad_norm": 8.1875, |
| "learning_rate": 9.234227609291005e-06, |
| "loss": 0.8065330505371093, |
| "step": 31200 |
| }, |
| { |
| "epoch": 0.5532980993103692, |
| "grad_norm": 6.5625, |
| "learning_rate": 9.232959732224364e-06, |
| "loss": 0.766522445678711, |
| "step": 31250 |
| }, |
| { |
| "epoch": 0.5541833762692658, |
| "grad_norm": 8.4375, |
| "learning_rate": 9.231691855157725e-06, |
| "loss": 0.7584080505371094, |
| "step": 31300 |
| }, |
| { |
| "epoch": 0.5550686532281625, |
| "grad_norm": 7.8125, |
| "learning_rate": 9.230423978091086e-06, |
| "loss": 0.835227279663086, |
| "step": 31350 |
| }, |
| { |
| "epoch": 0.5559539301870591, |
| "grad_norm": 7.71875, |
| "learning_rate": 9.229156101024445e-06, |
| "loss": 0.8134999084472656, |
| "step": 31400 |
| }, |
| { |
| "epoch": 0.5568392071459556, |
| "grad_norm": 8.625, |
| "learning_rate": 9.227888223957806e-06, |
| "loss": 0.8568966674804688, |
| "step": 31450 |
| }, |
| { |
| "epoch": 0.5577244841048522, |
| "grad_norm": 9.1875, |
| "learning_rate": 9.226620346891167e-06, |
| "loss": 0.8286125946044922, |
| "step": 31500 |
| }, |
| { |
| "epoch": 0.5586097610637488, |
| "grad_norm": 8.1875, |
| "learning_rate": 9.225352469824527e-06, |
| "loss": 0.806230697631836, |
| "step": 31550 |
| }, |
| { |
| "epoch": 0.5594950380226453, |
| "grad_norm": 7.0625, |
| "learning_rate": 9.224084592757886e-06, |
| "loss": 0.8486292266845703, |
| "step": 31600 |
| }, |
| { |
| "epoch": 0.560380314981542, |
| "grad_norm": 9.875, |
| "learning_rate": 9.222816715691247e-06, |
| "loss": 0.8534250640869141, |
| "step": 31650 |
| }, |
| { |
| "epoch": 0.5612655919404386, |
| "grad_norm": 9.0, |
| "learning_rate": 9.221548838624608e-06, |
| "loss": 0.7652369689941406, |
| "step": 31700 |
| }, |
| { |
| "epoch": 0.5621508688993352, |
| "grad_norm": 9.75, |
| "learning_rate": 9.220280961557967e-06, |
| "loss": 0.8141221618652343, |
| "step": 31750 |
| }, |
| { |
| "epoch": 0.5630361458582317, |
| "grad_norm": 7.75, |
| "learning_rate": 9.219013084491328e-06, |
| "loss": 0.7593769073486328, |
| "step": 31800 |
| }, |
| { |
| "epoch": 0.5639214228171283, |
| "grad_norm": 9.5625, |
| "learning_rate": 9.217745207424689e-06, |
| "loss": 0.89138671875, |
| "step": 31850 |
| }, |
| { |
| "epoch": 0.5648066997760249, |
| "grad_norm": 8.1875, |
| "learning_rate": 9.21647733035805e-06, |
| "loss": 0.7844825744628906, |
| "step": 31900 |
| }, |
| { |
| "epoch": 0.5656919767349216, |
| "grad_norm": 7.9375, |
| "learning_rate": 9.215209453291409e-06, |
| "loss": 0.7966393280029297, |
| "step": 31950 |
| }, |
| { |
| "epoch": 0.5665772536938181, |
| "grad_norm": 7.71875, |
| "learning_rate": 9.21394157622477e-06, |
| "loss": 0.7827631378173828, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.5674625306527147, |
| "grad_norm": 9.0, |
| "learning_rate": 9.21267369915813e-06, |
| "loss": 0.8383135986328125, |
| "step": 32050 |
| }, |
| { |
| "epoch": 0.5683478076116113, |
| "grad_norm": 8.3125, |
| "learning_rate": 9.211405822091491e-06, |
| "loss": 0.8304233551025391, |
| "step": 32100 |
| }, |
| { |
| "epoch": 0.5692330845705079, |
| "grad_norm": 9.5, |
| "learning_rate": 9.21013794502485e-06, |
| "loss": 0.8089006042480469, |
| "step": 32150 |
| }, |
| { |
| "epoch": 0.5701183615294044, |
| "grad_norm": 6.40625, |
| "learning_rate": 9.208870067958211e-06, |
| "loss": 0.8643374633789063, |
| "step": 32200 |
| }, |
| { |
| "epoch": 0.571003638488301, |
| "grad_norm": 8.5, |
| "learning_rate": 9.207602190891572e-06, |
| "loss": 0.87065185546875, |
| "step": 32250 |
| }, |
| { |
| "epoch": 0.5718889154471977, |
| "grad_norm": 8.375, |
| "learning_rate": 9.206334313824933e-06, |
| "loss": 0.8011983489990234, |
| "step": 32300 |
| }, |
| { |
| "epoch": 0.5727741924060943, |
| "grad_norm": 9.5, |
| "learning_rate": 9.205066436758294e-06, |
| "loss": 0.8257938385009765, |
| "step": 32350 |
| }, |
| { |
| "epoch": 0.5736594693649908, |
| "grad_norm": 9.25, |
| "learning_rate": 9.203798559691654e-06, |
| "loss": 0.7791096496582032, |
| "step": 32400 |
| }, |
| { |
| "epoch": 0.5745447463238874, |
| "grad_norm": 9.6875, |
| "learning_rate": 9.202530682625013e-06, |
| "loss": 0.8195112609863281, |
| "step": 32450 |
| }, |
| { |
| "epoch": 0.575430023282784, |
| "grad_norm": 8.25, |
| "learning_rate": 9.201262805558374e-06, |
| "loss": 0.8126992797851562, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.5763153002416807, |
| "grad_norm": 6.9375, |
| "learning_rate": 9.199994928491735e-06, |
| "loss": 0.8560022735595703, |
| "step": 32550 |
| }, |
| { |
| "epoch": 0.5772005772005772, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.198727051425094e-06, |
| "loss": 0.8273929595947266, |
| "step": 32600 |
| }, |
| { |
| "epoch": 0.5780858541594738, |
| "grad_norm": 9.25, |
| "learning_rate": 9.197459174358455e-06, |
| "loss": 0.8422256469726562, |
| "step": 32650 |
| }, |
| { |
| "epoch": 0.5789711311183704, |
| "grad_norm": 7.625, |
| "learning_rate": 9.196191297291816e-06, |
| "loss": 0.7996526336669922, |
| "step": 32700 |
| }, |
| { |
| "epoch": 0.579856408077267, |
| "grad_norm": 9.125, |
| "learning_rate": 9.194923420225177e-06, |
| "loss": 0.8200563812255859, |
| "step": 32750 |
| }, |
| { |
| "epoch": 0.5807416850361635, |
| "grad_norm": 8.4375, |
| "learning_rate": 9.193655543158536e-06, |
| "loss": 0.7688130187988281, |
| "step": 32800 |
| }, |
| { |
| "epoch": 0.5816269619950601, |
| "grad_norm": 7.625, |
| "learning_rate": 9.192387666091897e-06, |
| "loss": 0.8218675231933594, |
| "step": 32850 |
| }, |
| { |
| "epoch": 0.5825122389539568, |
| "grad_norm": 10.5625, |
| "learning_rate": 9.191119789025257e-06, |
| "loss": 0.8125396728515625, |
| "step": 32900 |
| }, |
| { |
| "epoch": 0.5833975159128534, |
| "grad_norm": 9.0, |
| "learning_rate": 9.189851911958616e-06, |
| "loss": 0.7877046966552734, |
| "step": 32950 |
| }, |
| { |
| "epoch": 0.5842827928717499, |
| "grad_norm": 10.5, |
| "learning_rate": 9.188584034891977e-06, |
| "loss": 0.8003798675537109, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.5851680698306465, |
| "grad_norm": 10.1875, |
| "learning_rate": 9.187316157825338e-06, |
| "loss": 0.768984146118164, |
| "step": 33050 |
| }, |
| { |
| "epoch": 0.5860533467895431, |
| "grad_norm": 9.75, |
| "learning_rate": 9.186048280758699e-06, |
| "loss": 0.8237137603759765, |
| "step": 33100 |
| }, |
| { |
| "epoch": 0.5869386237484397, |
| "grad_norm": 7.75, |
| "learning_rate": 9.184780403692058e-06, |
| "loss": 0.7769164276123047, |
| "step": 33150 |
| }, |
| { |
| "epoch": 0.5878239007073363, |
| "grad_norm": 9.375, |
| "learning_rate": 9.183512526625419e-06, |
| "loss": 0.8206394958496094, |
| "step": 33200 |
| }, |
| { |
| "epoch": 0.5887091776662329, |
| "grad_norm": 8.125, |
| "learning_rate": 9.18224464955878e-06, |
| "loss": 0.811323013305664, |
| "step": 33250 |
| }, |
| { |
| "epoch": 0.5895944546251295, |
| "grad_norm": 7.3125, |
| "learning_rate": 9.180976772492139e-06, |
| "loss": 0.8037760925292968, |
| "step": 33300 |
| }, |
| { |
| "epoch": 0.5904797315840261, |
| "grad_norm": 9.4375, |
| "learning_rate": 9.1797088954255e-06, |
| "loss": 0.8595541381835937, |
| "step": 33350 |
| }, |
| { |
| "epoch": 0.5913650085429226, |
| "grad_norm": 9.5625, |
| "learning_rate": 9.17844101835886e-06, |
| "loss": 0.7697556304931641, |
| "step": 33400 |
| }, |
| { |
| "epoch": 0.5922502855018192, |
| "grad_norm": 9.25, |
| "learning_rate": 9.177173141292221e-06, |
| "loss": 0.7896424865722657, |
| "step": 33450 |
| }, |
| { |
| "epoch": 0.5931355624607159, |
| "grad_norm": 8.3125, |
| "learning_rate": 9.175905264225582e-06, |
| "loss": 0.8332701873779297, |
| "step": 33500 |
| }, |
| { |
| "epoch": 0.5940208394196125, |
| "grad_norm": 7.96875, |
| "learning_rate": 9.174637387158943e-06, |
| "loss": 0.7919515228271484, |
| "step": 33550 |
| }, |
| { |
| "epoch": 0.594906116378509, |
| "grad_norm": 8.1875, |
| "learning_rate": 9.173369510092302e-06, |
| "loss": 0.800345687866211, |
| "step": 33600 |
| }, |
| { |
| "epoch": 0.5957913933374056, |
| "grad_norm": 9.3125, |
| "learning_rate": 9.172101633025663e-06, |
| "loss": 0.7806795501708984, |
| "step": 33650 |
| }, |
| { |
| "epoch": 0.5966766702963022, |
| "grad_norm": 7.96875, |
| "learning_rate": 9.170833755959023e-06, |
| "loss": 0.8437120056152344, |
| "step": 33700 |
| }, |
| { |
| "epoch": 0.5975619472551988, |
| "grad_norm": 10.4375, |
| "learning_rate": 9.169565878892384e-06, |
| "loss": 0.854691162109375, |
| "step": 33750 |
| }, |
| { |
| "epoch": 0.5984472242140954, |
| "grad_norm": 9.125, |
| "learning_rate": 9.168298001825743e-06, |
| "loss": 0.7852024078369141, |
| "step": 33800 |
| }, |
| { |
| "epoch": 0.599332501172992, |
| "grad_norm": 10.25, |
| "learning_rate": 9.167030124759104e-06, |
| "loss": 0.7816255187988281, |
| "step": 33850 |
| }, |
| { |
| "epoch": 0.6002177781318886, |
| "grad_norm": 9.375, |
| "learning_rate": 9.165762247692465e-06, |
| "loss": 0.7625868988037109, |
| "step": 33900 |
| }, |
| { |
| "epoch": 0.6011030550907851, |
| "grad_norm": 7.71875, |
| "learning_rate": 9.164494370625824e-06, |
| "loss": 0.8304756164550782, |
| "step": 33950 |
| }, |
| { |
| "epoch": 0.6019883320496817, |
| "grad_norm": 9.6875, |
| "learning_rate": 9.163226493559185e-06, |
| "loss": 0.8507473754882813, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.6028736090085783, |
| "grad_norm": 9.9375, |
| "learning_rate": 9.161958616492546e-06, |
| "loss": 0.8916117858886718, |
| "step": 34050 |
| }, |
| { |
| "epoch": 0.603758885967475, |
| "grad_norm": 9.0625, |
| "learning_rate": 9.160690739425907e-06, |
| "loss": 0.7812516021728516, |
| "step": 34100 |
| }, |
| { |
| "epoch": 0.6046441629263715, |
| "grad_norm": 9.9375, |
| "learning_rate": 9.159422862359266e-06, |
| "loss": 0.85893310546875, |
| "step": 34150 |
| }, |
| { |
| "epoch": 0.6055294398852681, |
| "grad_norm": 8.75, |
| "learning_rate": 9.158154985292627e-06, |
| "loss": 0.8037278747558594, |
| "step": 34200 |
| }, |
| { |
| "epoch": 0.6064147168441647, |
| "grad_norm": 9.5, |
| "learning_rate": 9.156887108225987e-06, |
| "loss": 0.8571352386474609, |
| "step": 34250 |
| }, |
| { |
| "epoch": 0.6072999938030613, |
| "grad_norm": 9.25, |
| "learning_rate": 9.155619231159346e-06, |
| "loss": 0.8393865203857422, |
| "step": 34300 |
| }, |
| { |
| "epoch": 0.6081852707619578, |
| "grad_norm": 8.8125, |
| "learning_rate": 9.154351354092707e-06, |
| "loss": 0.7959491729736328, |
| "step": 34350 |
| }, |
| { |
| "epoch": 0.6090705477208544, |
| "grad_norm": 8.375, |
| "learning_rate": 9.153083477026068e-06, |
| "loss": 0.8323846435546876, |
| "step": 34400 |
| }, |
| { |
| "epoch": 0.6099558246797511, |
| "grad_norm": 8.1875, |
| "learning_rate": 9.151815599959429e-06, |
| "loss": 0.7896021270751953, |
| "step": 34450 |
| }, |
| { |
| "epoch": 0.6108411016386477, |
| "grad_norm": 10.4375, |
| "learning_rate": 9.150547722892788e-06, |
| "loss": 0.8387126922607422, |
| "step": 34500 |
| }, |
| { |
| "epoch": 0.6117263785975442, |
| "grad_norm": 8.75, |
| "learning_rate": 9.149279845826149e-06, |
| "loss": 0.8292191314697266, |
| "step": 34550 |
| }, |
| { |
| "epoch": 0.6126116555564408, |
| "grad_norm": 6.75, |
| "learning_rate": 9.14801196875951e-06, |
| "loss": 0.8881966400146485, |
| "step": 34600 |
| }, |
| { |
| "epoch": 0.6134969325153374, |
| "grad_norm": 8.875, |
| "learning_rate": 9.14674409169287e-06, |
| "loss": 0.7790008544921875, |
| "step": 34650 |
| }, |
| { |
| "epoch": 0.614382209474234, |
| "grad_norm": 9.875, |
| "learning_rate": 9.145476214626231e-06, |
| "loss": 0.772136459350586, |
| "step": 34700 |
| }, |
| { |
| "epoch": 0.6152674864331306, |
| "grad_norm": 8.9375, |
| "learning_rate": 9.144208337559592e-06, |
| "loss": 0.8583509063720703, |
| "step": 34750 |
| }, |
| { |
| "epoch": 0.6161527633920272, |
| "grad_norm": 9.5, |
| "learning_rate": 9.142940460492951e-06, |
| "loss": 0.7817726898193359, |
| "step": 34800 |
| }, |
| { |
| "epoch": 0.6170380403509238, |
| "grad_norm": 7.96875, |
| "learning_rate": 9.141672583426312e-06, |
| "loss": 0.7983211517333985, |
| "step": 34850 |
| }, |
| { |
| "epoch": 0.6179233173098204, |
| "grad_norm": 7.75, |
| "learning_rate": 9.140404706359673e-06, |
| "loss": 0.8518927001953125, |
| "step": 34900 |
| }, |
| { |
| "epoch": 0.6188085942687169, |
| "grad_norm": 7.15625, |
| "learning_rate": 9.139136829293032e-06, |
| "loss": 0.7762453460693359, |
| "step": 34950 |
| }, |
| { |
| "epoch": 0.6196938712276135, |
| "grad_norm": 8.6875, |
| "learning_rate": 9.137868952226393e-06, |
| "loss": 0.7606196594238281, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.6196938712276135, |
| "eval_cer": 14.630998411088756, |
| "eval_loss": 0.34649306535720825, |
| "eval_runtime": 381.1753, |
| "eval_samples_per_second": 13.117, |
| "eval_steps_per_second": 1.64, |
| "eval_wer": 30.37997340697555, |
| "step": 35000 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 395360, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 7, |
| "save_steps": 5000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.8051981033472e+21, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|