Invalid JSON: Unexpected token 'N', ..."ad_norm": NaN,
"... is not valid JSON
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.05060984867655246, | |
| "eval_steps": 200, | |
| "global_step": 2000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 5.060984867655246e-05, | |
| "grad_norm": 5.005204677581787, | |
| "learning_rate": 4.999915650252206e-05, | |
| "loss": 0.722, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.00010121969735310492, | |
| "grad_norm": 4.560483932495117, | |
| "learning_rate": 4.9998313005044116e-05, | |
| "loss": 0.3625, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.00015182954602965736, | |
| "grad_norm": 9.806407928466797, | |
| "learning_rate": 4.9997469507566176e-05, | |
| "loss": 0.786, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.00020243939470620983, | |
| "grad_norm": 11.181573867797852, | |
| "learning_rate": 4.999662601008823e-05, | |
| "loss": 0.6063, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0002530492433827623, | |
| "grad_norm": 3.7916452884674072, | |
| "learning_rate": 4.999578251261029e-05, | |
| "loss": 0.4681, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0003036590920593147, | |
| "grad_norm": 10.196318626403809, | |
| "learning_rate": 4.999493901513235e-05, | |
| "loss": 0.8192, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0003542689407358672, | |
| "grad_norm": 8.210321426391602, | |
| "learning_rate": 4.9994095517654403e-05, | |
| "loss": 0.7953, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.00040487878941241966, | |
| "grad_norm": 8.648380279541016, | |
| "learning_rate": 4.9993252020176464e-05, | |
| "loss": 0.5404, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0004554886380889721, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.999283027143749e-05, | |
| "loss": 0.7557, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0005060984867655246, | |
| "grad_norm": 3.2002220153808594, | |
| "learning_rate": 4.999198677395955e-05, | |
| "loss": 0.5948, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.000556708335442077, | |
| "grad_norm": 6.236138343811035, | |
| "learning_rate": 4.9991143276481604e-05, | |
| "loss": 1.1657, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.0006073181841186294, | |
| "grad_norm": 8.113347053527832, | |
| "learning_rate": 4.9990299779003664e-05, | |
| "loss": 0.6126, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0006579280327951819, | |
| "grad_norm": 6.6634979248046875, | |
| "learning_rate": 4.998945628152572e-05, | |
| "loss": 0.5794, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.0007085378814717344, | |
| "grad_norm": 7.940774440765381, | |
| "learning_rate": 4.998861278404778e-05, | |
| "loss": 0.3316, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.0007591477301482868, | |
| "grad_norm": 7.296051025390625, | |
| "learning_rate": 4.998776928656984e-05, | |
| "loss": 0.8218, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0008097575788248393, | |
| "grad_norm": 6.318060398101807, | |
| "learning_rate": 4.998692578909189e-05, | |
| "loss": 0.7266, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0008603674275013917, | |
| "grad_norm": 13.293371200561523, | |
| "learning_rate": 4.998608229161395e-05, | |
| "loss": 0.96, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.0009109772761779442, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.998566054287498e-05, | |
| "loss": 0.9009, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.0009615871248544967, | |
| "grad_norm": 4.929239749908447, | |
| "learning_rate": 4.998481704539704e-05, | |
| "loss": 0.7153, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.001012196973531049, | |
| "grad_norm": 8.825164794921875, | |
| "learning_rate": 4.998397354791909e-05, | |
| "loss": 0.4486, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0010628068222076016, | |
| "grad_norm": 8.393811225891113, | |
| "learning_rate": 4.998313005044115e-05, | |
| "loss": 0.7293, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.001113416670884154, | |
| "grad_norm": 4.109868049621582, | |
| "learning_rate": 4.9982286552963206e-05, | |
| "loss": 0.5638, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.0011640265195607066, | |
| "grad_norm": 13.631953239440918, | |
| "learning_rate": 4.9981443055485266e-05, | |
| "loss": 1.0084, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.0012146363682372589, | |
| "grad_norm": 9.624829292297363, | |
| "learning_rate": 4.9980599558007326e-05, | |
| "loss": 0.9545, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.0012652462169138114, | |
| "grad_norm": 5.674628257751465, | |
| "learning_rate": 4.997975606052938e-05, | |
| "loss": 0.5188, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0013158560655903639, | |
| "grad_norm": 6.339033603668213, | |
| "learning_rate": 4.997891256305144e-05, | |
| "loss": 0.5308, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.0013664659142669164, | |
| "grad_norm": 3.895756244659424, | |
| "learning_rate": 4.997806906557349e-05, | |
| "loss": 0.6042, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.0014170757629434689, | |
| "grad_norm": 4.451659679412842, | |
| "learning_rate": 4.997722556809555e-05, | |
| "loss": 0.4957, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.0014676856116200212, | |
| "grad_norm": 5.246840476989746, | |
| "learning_rate": 4.9976382070617613e-05, | |
| "loss": 0.7031, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.0015182954602965737, | |
| "grad_norm": 7.467830657958984, | |
| "learning_rate": 4.997553857313967e-05, | |
| "loss": 0.8567, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0015689053089731262, | |
| "grad_norm": 6.088858127593994, | |
| "learning_rate": 4.997469507566173e-05, | |
| "loss": 0.5998, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.0016195151576496787, | |
| "grad_norm": 10.719757080078125, | |
| "learning_rate": 4.997385157818378e-05, | |
| "loss": 0.8178, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.0016701250063262312, | |
| "grad_norm": 5.923684120178223, | |
| "learning_rate": 4.997300808070584e-05, | |
| "loss": 0.9103, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.0017207348550027834, | |
| "grad_norm": 8.114771842956543, | |
| "learning_rate": 4.99721645832279e-05, | |
| "loss": 0.4154, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.001771344703679336, | |
| "grad_norm": 9.67381477355957, | |
| "learning_rate": 4.9971321085749954e-05, | |
| "loss": 0.6067, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0018219545523558884, | |
| "grad_norm": 7.255108833312988, | |
| "learning_rate": 4.9970477588272014e-05, | |
| "loss": 0.4104, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.001872564401032441, | |
| "grad_norm": 6.330812454223633, | |
| "learning_rate": 4.996963409079407e-05, | |
| "loss": 0.7361, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.0019231742497089934, | |
| "grad_norm": 10.727275848388672, | |
| "learning_rate": 4.996879059331613e-05, | |
| "loss": 0.8339, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.001973784098385546, | |
| "grad_norm": 5.6855950355529785, | |
| "learning_rate": 4.996794709583819e-05, | |
| "loss": 0.6211, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.002024393947062098, | |
| "grad_norm": 9.52706527709961, | |
| "learning_rate": 4.996710359836024e-05, | |
| "loss": 0.6736, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.002075003795738651, | |
| "grad_norm": 9.556534767150879, | |
| "learning_rate": 4.99662601008823e-05, | |
| "loss": 0.5151, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.002125613644415203, | |
| "grad_norm": 4.778113842010498, | |
| "learning_rate": 4.9965416603404355e-05, | |
| "loss": 0.5348, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.0021762234930917555, | |
| "grad_norm": 10.250826835632324, | |
| "learning_rate": 4.9964573105926416e-05, | |
| "loss": 0.5674, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.002226833341768308, | |
| "grad_norm": 6.03010892868042, | |
| "learning_rate": 4.9963729608448476e-05, | |
| "loss": 0.4641, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.0022774431904448605, | |
| "grad_norm": 10.152463912963867, | |
| "learning_rate": 4.996288611097053e-05, | |
| "loss": 1.0214, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.002328053039121413, | |
| "grad_norm": 8.930192947387695, | |
| "learning_rate": 4.996204261349259e-05, | |
| "loss": 0.7772, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.0023786628877979655, | |
| "grad_norm": 10.251153945922852, | |
| "learning_rate": 4.996119911601464e-05, | |
| "loss": 0.9428, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.0024292727364745178, | |
| "grad_norm": 6.223593711853027, | |
| "learning_rate": 4.99603556185367e-05, | |
| "loss": 0.6318, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.0024798825851510705, | |
| "grad_norm": 2.4227373600006104, | |
| "learning_rate": 4.995951212105876e-05, | |
| "loss": 0.436, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.0025304924338276228, | |
| "grad_norm": 6.225770473480225, | |
| "learning_rate": 4.9958668623580817e-05, | |
| "loss": 0.7753, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0025811022825041755, | |
| "grad_norm": 5.370884895324707, | |
| "learning_rate": 4.995782512610288e-05, | |
| "loss": 0.5137, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.0026317121311807278, | |
| "grad_norm": 5.137220859527588, | |
| "learning_rate": 4.995698162862493e-05, | |
| "loss": 0.5953, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.00268232197985728, | |
| "grad_norm": 5.909286975860596, | |
| "learning_rate": 4.995613813114699e-05, | |
| "loss": 0.8687, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.0027329318285338328, | |
| "grad_norm": 6.003445148468018, | |
| "learning_rate": 4.995529463366905e-05, | |
| "loss": 0.53, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.002783541677210385, | |
| "grad_norm": 3.8742566108703613, | |
| "learning_rate": 4.9954451136191104e-05, | |
| "loss": 0.5512, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0028341515258869378, | |
| "grad_norm": 3.5072569847106934, | |
| "learning_rate": 4.9953607638713164e-05, | |
| "loss": 0.3383, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.00288476137456349, | |
| "grad_norm": 5.679248809814453, | |
| "learning_rate": 4.995276414123522e-05, | |
| "loss": 0.7801, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.0029353712232400423, | |
| "grad_norm": 5.291722297668457, | |
| "learning_rate": 4.995192064375728e-05, | |
| "loss": 0.6538, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.002985981071916595, | |
| "grad_norm": 4.748198986053467, | |
| "learning_rate": 4.995107714627934e-05, | |
| "loss": 0.398, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.0030365909205931473, | |
| "grad_norm": 4.336450576782227, | |
| "learning_rate": 4.995023364880139e-05, | |
| "loss": 0.3351, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.0030872007692697, | |
| "grad_norm": 5.251417636871338, | |
| "learning_rate": 4.994939015132345e-05, | |
| "loss": 0.4014, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.0031378106179462523, | |
| "grad_norm": 7.911617755889893, | |
| "learning_rate": 4.9948546653845505e-05, | |
| "loss": 0.3151, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.0031884204666228046, | |
| "grad_norm": 6.031822681427002, | |
| "learning_rate": 4.9947703156367565e-05, | |
| "loss": 0.9222, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.0032390303152993573, | |
| "grad_norm": 10.08216381072998, | |
| "learning_rate": 4.994685965888962e-05, | |
| "loss": 0.6676, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.0032896401639759096, | |
| "grad_norm": 3.475123167037964, | |
| "learning_rate": 4.994601616141168e-05, | |
| "loss": 0.2461, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.0033402500126524623, | |
| "grad_norm": 4.408476829528809, | |
| "learning_rate": 4.994517266393374e-05, | |
| "loss": 0.26, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.0033908598613290146, | |
| "grad_norm": 6.929357528686523, | |
| "learning_rate": 4.994432916645579e-05, | |
| "loss": 0.9818, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.003441469710005567, | |
| "grad_norm": 6.456033229827881, | |
| "learning_rate": 4.994348566897785e-05, | |
| "loss": 0.4414, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.0034920795586821196, | |
| "grad_norm": 26.87461280822754, | |
| "learning_rate": 4.9942642171499906e-05, | |
| "loss": 0.7951, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.003542689407358672, | |
| "grad_norm": 7.420943737030029, | |
| "learning_rate": 4.9941798674021966e-05, | |
| "loss": 0.5827, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.0035932992560352246, | |
| "grad_norm": 5.9366583824157715, | |
| "learning_rate": 4.9940955176544027e-05, | |
| "loss": 0.6941, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.003643909104711777, | |
| "grad_norm": 5.8037543296813965, | |
| "learning_rate": 4.994011167906608e-05, | |
| "loss": 0.6301, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.003694518953388329, | |
| "grad_norm": 4.243365287780762, | |
| "learning_rate": 4.993926818158814e-05, | |
| "loss": 0.6995, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.003745128802064882, | |
| "grad_norm": 7.5515522956848145, | |
| "learning_rate": 4.9938424684110194e-05, | |
| "loss": 0.5537, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.003795738650741434, | |
| "grad_norm": 8.966812133789062, | |
| "learning_rate": 4.9937581186632254e-05, | |
| "loss": 0.5416, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.003846348499417987, | |
| "grad_norm": 7.70654296875, | |
| "learning_rate": 4.9936737689154314e-05, | |
| "loss": 0.6907, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.003896958348094539, | |
| "grad_norm": 6.371860504150391, | |
| "learning_rate": 4.993589419167637e-05, | |
| "loss": 0.468, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.003947568196771092, | |
| "grad_norm": 6.1000542640686035, | |
| "learning_rate": 4.993505069419843e-05, | |
| "loss": 0.6523, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.003998178045447644, | |
| "grad_norm": 5.390421390533447, | |
| "learning_rate": 4.993420719672048e-05, | |
| "loss": 1.0799, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.004048787894124196, | |
| "grad_norm": 7.502729415893555, | |
| "learning_rate": 4.993336369924254e-05, | |
| "loss": 0.7921, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.004099397742800749, | |
| "grad_norm": 3.19514536857605, | |
| "learning_rate": 4.99325202017646e-05, | |
| "loss": 0.2293, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.004150007591477302, | |
| "grad_norm": 7.2197585105896, | |
| "learning_rate": 4.9931676704286655e-05, | |
| "loss": 0.6778, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.004200617440153854, | |
| "grad_norm": 4.662430286407471, | |
| "learning_rate": 4.9930833206808715e-05, | |
| "loss": 0.4956, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.004251227288830406, | |
| "grad_norm": 5.545823574066162, | |
| "learning_rate": 4.992998970933077e-05, | |
| "loss": 0.4587, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.004301837137506959, | |
| "grad_norm": 8.578537940979004, | |
| "learning_rate": 4.992914621185283e-05, | |
| "loss": 0.2751, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.004352446986183511, | |
| "grad_norm": 4.3413472175598145, | |
| "learning_rate": 4.992830271437489e-05, | |
| "loss": 0.3712, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.004403056834860064, | |
| "grad_norm": 3.3616743087768555, | |
| "learning_rate": 4.992745921689694e-05, | |
| "loss": 0.4654, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.004453666683536616, | |
| "grad_norm": 3.7586441040039062, | |
| "learning_rate": 4.9926615719419e-05, | |
| "loss": 0.5338, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.004504276532213168, | |
| "grad_norm": 4.674256801605225, | |
| "learning_rate": 4.9925772221941056e-05, | |
| "loss": 0.3885, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.004554886380889721, | |
| "grad_norm": 6.164566993713379, | |
| "learning_rate": 4.9924928724463116e-05, | |
| "loss": 0.3949, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.004605496229566274, | |
| "grad_norm": 12.5003023147583, | |
| "learning_rate": 4.9924085226985176e-05, | |
| "loss": 0.6829, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.004656106078242826, | |
| "grad_norm": 8.045877456665039, | |
| "learning_rate": 4.992324172950723e-05, | |
| "loss": 0.7711, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.004706715926919378, | |
| "grad_norm": 5.4890570640563965, | |
| "learning_rate": 4.992239823202929e-05, | |
| "loss": 0.6287, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.004757325775595931, | |
| "grad_norm": 3.246534585952759, | |
| "learning_rate": 4.992155473455134e-05, | |
| "loss": 0.3895, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.004807935624272484, | |
| "grad_norm": 7.648909568786621, | |
| "learning_rate": 4.9920711237073403e-05, | |
| "loss": 0.7327, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.0048585454729490355, | |
| "grad_norm": 5.417934894561768, | |
| "learning_rate": 4.9919867739595464e-05, | |
| "loss": 0.5529, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.004909155321625588, | |
| "grad_norm": 18.588844299316406, | |
| "learning_rate": 4.991902424211752e-05, | |
| "loss": 1.1181, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.004959765170302141, | |
| "grad_norm": 11.765092849731445, | |
| "learning_rate": 4.991818074463958e-05, | |
| "loss": 0.7821, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.005010375018978693, | |
| "grad_norm": 6.446319580078125, | |
| "learning_rate": 4.991733724716163e-05, | |
| "loss": 0.7071, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.0050609848676552455, | |
| "grad_norm": 7.347968101501465, | |
| "learning_rate": 4.991649374968369e-05, | |
| "loss": 0.4164, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0050609848676552455, | |
| "eval_cer": 0.23587196123692475, | |
| "eval_loss": 0.37637796998023987, | |
| "eval_runtime": 2775.7084, | |
| "eval_samples_per_second": 5.647, | |
| "eval_steps_per_second": 0.353, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.005111594716331798, | |
| "grad_norm": 10.311556816101074, | |
| "learning_rate": 4.991565025220575e-05, | |
| "loss": 1.1246, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.005162204565008351, | |
| "grad_norm": 10.519822120666504, | |
| "learning_rate": 4.9914806754727805e-05, | |
| "loss": 0.6127, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.005212814413684903, | |
| "grad_norm": 6.434566974639893, | |
| "learning_rate": 4.9913963257249865e-05, | |
| "loss": 0.5269, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.0052634242623614555, | |
| "grad_norm": 8.08187198638916, | |
| "learning_rate": 4.991311975977192e-05, | |
| "loss": 1.0978, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.005314034111038008, | |
| "grad_norm": 7.574239730834961, | |
| "learning_rate": 4.991227626229398e-05, | |
| "loss": 0.7327, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.00536464395971456, | |
| "grad_norm": 7.921819686889648, | |
| "learning_rate": 4.991143276481603e-05, | |
| "loss": 1.194, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.005415253808391113, | |
| "grad_norm": 9.017950057983398, | |
| "learning_rate": 4.991058926733809e-05, | |
| "loss": 0.9906, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.0054658636570676655, | |
| "grad_norm": 6.131129741668701, | |
| "learning_rate": 4.990974576986015e-05, | |
| "loss": 0.8681, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.005516473505744218, | |
| "grad_norm": 6.411858081817627, | |
| "learning_rate": 4.9908902272382206e-05, | |
| "loss": 0.6271, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.00556708335442077, | |
| "grad_norm": 6.6365227699279785, | |
| "learning_rate": 4.9908058774904266e-05, | |
| "loss": 0.9051, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.005617693203097323, | |
| "grad_norm": 7.575653553009033, | |
| "learning_rate": 4.990721527742632e-05, | |
| "loss": 0.9199, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.0056683030517738755, | |
| "grad_norm": 8.839277267456055, | |
| "learning_rate": 4.990637177994838e-05, | |
| "loss": 0.909, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.005718912900450427, | |
| "grad_norm": 8.077840805053711, | |
| "learning_rate": 4.990552828247044e-05, | |
| "loss": 0.6063, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.00576952274912698, | |
| "grad_norm": 8.73996639251709, | |
| "learning_rate": 4.990468478499249e-05, | |
| "loss": 0.9447, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.005820132597803533, | |
| "grad_norm": 6.638965129852295, | |
| "learning_rate": 4.990384128751455e-05, | |
| "loss": 0.6174, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.005870742446480085, | |
| "grad_norm": 5.351879119873047, | |
| "learning_rate": 4.990299779003661e-05, | |
| "loss": 0.4632, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.005921352295156637, | |
| "grad_norm": 6.497000694274902, | |
| "learning_rate": 4.990215429255867e-05, | |
| "loss": 0.5266, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.00597196214383319, | |
| "grad_norm": 10.24178695678711, | |
| "learning_rate": 4.990131079508073e-05, | |
| "loss": 0.8037, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.006022571992509743, | |
| "grad_norm": 5.579001426696777, | |
| "learning_rate": 4.990046729760278e-05, | |
| "loss": 0.6132, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.006073181841186295, | |
| "grad_norm": 3.613511562347412, | |
| "learning_rate": 4.989962380012484e-05, | |
| "loss": 0.3918, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.006123791689862847, | |
| "grad_norm": 9.699933052062988, | |
| "learning_rate": 4.9898780302646894e-05, | |
| "loss": 0.8956, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.0061744015385394, | |
| "grad_norm": 10.87297534942627, | |
| "learning_rate": 4.9897936805168954e-05, | |
| "loss": 1.1579, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.006225011387215952, | |
| "grad_norm": 6.29569673538208, | |
| "learning_rate": 4.9897093307691014e-05, | |
| "loss": 0.4237, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.006275621235892505, | |
| "grad_norm": 9.018596649169922, | |
| "learning_rate": 4.989624981021307e-05, | |
| "loss": 0.7854, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.006326231084569057, | |
| "grad_norm": 9.707599639892578, | |
| "learning_rate": 4.989540631273513e-05, | |
| "loss": 1.3102, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.006376840933245609, | |
| "grad_norm": 5.1023430824279785, | |
| "learning_rate": 4.989456281525718e-05, | |
| "loss": 0.5906, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.006427450781922162, | |
| "grad_norm": 2.3029062747955322, | |
| "learning_rate": 4.989371931777924e-05, | |
| "loss": 0.7183, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.006478060630598715, | |
| "grad_norm": 11.459485054016113, | |
| "learning_rate": 4.98928758203013e-05, | |
| "loss": 0.5505, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.006528670479275267, | |
| "grad_norm": 7.620695114135742, | |
| "learning_rate": 4.9892032322823355e-05, | |
| "loss": 0.9374, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.006579280327951819, | |
| "grad_norm": 5.305224895477295, | |
| "learning_rate": 4.9891188825345416e-05, | |
| "loss": 0.7362, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.006629890176628372, | |
| "grad_norm": 7.901217460632324, | |
| "learning_rate": 4.989034532786747e-05, | |
| "loss": 0.9518, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.006680500025304925, | |
| "grad_norm": 7.463654041290283, | |
| "learning_rate": 4.988950183038953e-05, | |
| "loss": 0.9789, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.0067311098739814765, | |
| "grad_norm": 4.86021089553833, | |
| "learning_rate": 4.988865833291159e-05, | |
| "loss": 0.8441, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.006781719722658029, | |
| "grad_norm": 5.242839813232422, | |
| "learning_rate": 4.988781483543364e-05, | |
| "loss": 0.8258, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.006832329571334582, | |
| "grad_norm": 3.524228572845459, | |
| "learning_rate": 4.98869713379557e-05, | |
| "loss": 0.3924, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.006882939420011134, | |
| "grad_norm": 3.8527095317840576, | |
| "learning_rate": 4.9886127840477756e-05, | |
| "loss": 0.5253, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.0069335492686876865, | |
| "grad_norm": 3.4836299419403076, | |
| "learning_rate": 4.9885284342999817e-05, | |
| "loss": 0.5859, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.006984159117364239, | |
| "grad_norm": 9.650673866271973, | |
| "learning_rate": 4.988444084552188e-05, | |
| "loss": 0.7122, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.007034768966040792, | |
| "grad_norm": 5.5393829345703125, | |
| "learning_rate": 4.988359734804393e-05, | |
| "loss": 0.5434, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.007085378814717344, | |
| "grad_norm": 11.345093727111816, | |
| "learning_rate": 4.988275385056599e-05, | |
| "loss": 0.9166, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.0071359886633938965, | |
| "grad_norm": 4.247881889343262, | |
| "learning_rate": 4.9881910353088044e-05, | |
| "loss": 0.6162, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.007186598512070449, | |
| "grad_norm": 5.472718238830566, | |
| "learning_rate": 4.9881066855610104e-05, | |
| "loss": 0.8378, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.007237208360747001, | |
| "grad_norm": 6.520363807678223, | |
| "learning_rate": 4.9880223358132164e-05, | |
| "loss": 0.6258, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.007287818209423554, | |
| "grad_norm": 4.629631996154785, | |
| "learning_rate": 4.987937986065422e-05, | |
| "loss": 0.5166, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.0073384280581001065, | |
| "grad_norm": 9.533284187316895, | |
| "learning_rate": 4.987853636317628e-05, | |
| "loss": 1.1145, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.007389037906776658, | |
| "grad_norm": 7.794739246368408, | |
| "learning_rate": 4.987769286569833e-05, | |
| "loss": 1.0134, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.007439647755453211, | |
| "grad_norm": 8.504875183105469, | |
| "learning_rate": 4.987684936822039e-05, | |
| "loss": 0.8261, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.007490257604129764, | |
| "grad_norm": 6.671535015106201, | |
| "learning_rate": 4.9876005870742445e-05, | |
| "loss": 0.7141, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.0075408674528063165, | |
| "grad_norm": 11.959641456604004, | |
| "learning_rate": 4.9875162373264505e-05, | |
| "loss": 0.8247, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.007591477301482868, | |
| "grad_norm": 4.4155378341674805, | |
| "learning_rate": 4.9874318875786565e-05, | |
| "loss": 0.348, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.007642087150159421, | |
| "grad_norm": 5.849681377410889, | |
| "learning_rate": 4.987347537830862e-05, | |
| "loss": 0.8355, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.007692696998835974, | |
| "grad_norm": 10.244682312011719, | |
| "learning_rate": 4.987263188083068e-05, | |
| "loss": 0.7634, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.007743306847512526, | |
| "grad_norm": 3.5256447792053223, | |
| "learning_rate": 4.987178838335273e-05, | |
| "loss": 0.451, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.007793916696189078, | |
| "grad_norm": 7.318663120269775, | |
| "learning_rate": 4.987094488587479e-05, | |
| "loss": 0.7009, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.007844526544865631, | |
| "grad_norm": 5.343123912811279, | |
| "learning_rate": 4.987010138839685e-05, | |
| "loss": 0.4291, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.007895136393542184, | |
| "grad_norm": 6.702011585235596, | |
| "learning_rate": 4.9869257890918906e-05, | |
| "loss": 0.9008, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.007945746242218736, | |
| "grad_norm": 9.040878295898438, | |
| "learning_rate": 4.9868414393440966e-05, | |
| "loss": 1.0295, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.007996356090895287, | |
| "grad_norm": 5.2994585037231445, | |
| "learning_rate": 4.986757089596302e-05, | |
| "loss": 0.6427, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.00804696593957184, | |
| "grad_norm": 8.381704330444336, | |
| "learning_rate": 4.986672739848508e-05, | |
| "loss": 0.8675, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.008097575788248393, | |
| "grad_norm": 9.162700653076172, | |
| "learning_rate": 4.986588390100714e-05, | |
| "loss": 1.1891, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.008148185636924946, | |
| "grad_norm": 10.618518829345703, | |
| "learning_rate": 4.9865040403529194e-05, | |
| "loss": 0.7131, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.008198795485601498, | |
| "grad_norm": 7.559556484222412, | |
| "learning_rate": 4.9864196906051254e-05, | |
| "loss": 0.7327, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.008249405334278051, | |
| "grad_norm": 7.372714519500732, | |
| "learning_rate": 4.986335340857331e-05, | |
| "loss": 0.5496, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.008300015182954604, | |
| "grad_norm": 5.887473106384277, | |
| "learning_rate": 4.986250991109537e-05, | |
| "loss": 0.4605, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.008350625031631155, | |
| "grad_norm": 3.8664021492004395, | |
| "learning_rate": 4.986166641361743e-05, | |
| "loss": 0.8836, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.008401234880307707, | |
| "grad_norm": 6.713327407836914, | |
| "learning_rate": 4.986082291613948e-05, | |
| "loss": 0.7846, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.00845184472898426, | |
| "grad_norm": 6.036464691162109, | |
| "learning_rate": 4.985997941866154e-05, | |
| "loss": 0.8055, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.008502454577660813, | |
| "grad_norm": 4.087986946105957, | |
| "learning_rate": 4.9859135921183595e-05, | |
| "loss": 0.7829, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.008553064426337366, | |
| "grad_norm": 9.335679054260254, | |
| "learning_rate": 4.9858292423705655e-05, | |
| "loss": 1.279, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.008603674275013918, | |
| "grad_norm": 6.914140701293945, | |
| "learning_rate": 4.9857448926227715e-05, | |
| "loss": 0.8773, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.008654284123690471, | |
| "grad_norm": 5.106595993041992, | |
| "learning_rate": 4.985660542874977e-05, | |
| "loss": 0.7357, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.008704893972367022, | |
| "grad_norm": 5.217001914978027, | |
| "learning_rate": 4.985576193127183e-05, | |
| "loss": 1.0202, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.008755503821043575, | |
| "grad_norm": 7.956677436828613, | |
| "learning_rate": 4.985491843379388e-05, | |
| "loss": 0.8517, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.008806113669720127, | |
| "grad_norm": 6.432283878326416, | |
| "learning_rate": 4.985407493631594e-05, | |
| "loss": 0.7963, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.00885672351839668, | |
| "grad_norm": 25.808292388916016, | |
| "learning_rate": 4.9853231438838e-05, | |
| "loss": 0.5176, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.008907333367073233, | |
| "grad_norm": 6.786406517028809, | |
| "learning_rate": 4.9852387941360056e-05, | |
| "loss": 0.9981, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.008957943215749786, | |
| "grad_norm": 13.001777648925781, | |
| "learning_rate": 4.9851544443882116e-05, | |
| "loss": 0.6546, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.009008553064426337, | |
| "grad_norm": 5.085973262786865, | |
| "learning_rate": 4.985070094640417e-05, | |
| "loss": 0.4445, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.00905916291310289, | |
| "grad_norm": 10.445619583129883, | |
| "learning_rate": 4.984985744892623e-05, | |
| "loss": 0.519, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.009109772761779442, | |
| "grad_norm": 7.5755510330200195, | |
| "learning_rate": 4.984901395144829e-05, | |
| "loss": 0.9253, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.009160382610455995, | |
| "grad_norm": 12.85741901397705, | |
| "learning_rate": 4.984817045397034e-05, | |
| "loss": 0.7553, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.009210992459132547, | |
| "grad_norm": 4.291009426116943, | |
| "learning_rate": 4.9847326956492404e-05, | |
| "loss": 0.7912, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.0092616023078091, | |
| "grad_norm": 9.602625846862793, | |
| "learning_rate": 4.984648345901446e-05, | |
| "loss": 0.7954, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.009312212156485653, | |
| "grad_norm": 10.099987030029297, | |
| "learning_rate": 4.984563996153652e-05, | |
| "loss": 0.6241, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.009362822005162204, | |
| "grad_norm": 9.353877067565918, | |
| "learning_rate": 4.984479646405858e-05, | |
| "loss": 0.7845, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.009413431853838757, | |
| "grad_norm": 5.5436506271362305, | |
| "learning_rate": 4.984395296658063e-05, | |
| "loss": 0.6337, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.00946404170251531, | |
| "grad_norm": 6.538369655609131, | |
| "learning_rate": 4.984310946910269e-05, | |
| "loss": 0.4818, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.009514651551191862, | |
| "grad_norm": 11.956756591796875, | |
| "learning_rate": 4.9842265971624744e-05, | |
| "loss": 0.9237, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.009565261399868415, | |
| "grad_norm": 11.44477367401123, | |
| "learning_rate": 4.9841422474146805e-05, | |
| "loss": 1.0572, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.009615871248544967, | |
| "grad_norm": 9.988565444946289, | |
| "learning_rate": 4.984057897666886e-05, | |
| "loss": 0.6858, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.00966648109722152, | |
| "grad_norm": 4.8977813720703125, | |
| "learning_rate": 4.983973547919092e-05, | |
| "loss": 0.5965, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.009717090945898071, | |
| "grad_norm": 6.308709144592285, | |
| "learning_rate": 4.983889198171298e-05, | |
| "loss": 0.4817, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.009767700794574624, | |
| "grad_norm": 5.661224842071533, | |
| "learning_rate": 4.983804848423503e-05, | |
| "loss": 0.5738, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.009818310643251177, | |
| "grad_norm": 5.926614284515381, | |
| "learning_rate": 4.983720498675709e-05, | |
| "loss": 0.5391, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.00986892049192773, | |
| "grad_norm": 4.607276916503906, | |
| "learning_rate": 4.9836361489279145e-05, | |
| "loss": 0.4223, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.009919530340604282, | |
| "grad_norm": 14.739141464233398, | |
| "learning_rate": 4.9835517991801206e-05, | |
| "loss": 1.0047, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.009970140189280835, | |
| "grad_norm": 4.335879325866699, | |
| "learning_rate": 4.9834674494323266e-05, | |
| "loss": 0.7172, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.010020750037957386, | |
| "grad_norm": 8.950626373291016, | |
| "learning_rate": 4.983383099684532e-05, | |
| "loss": 0.8787, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.010071359886633938, | |
| "grad_norm": 5.630717754364014, | |
| "learning_rate": 4.983298749936738e-05, | |
| "loss": 0.5813, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.010121969735310491, | |
| "grad_norm": 9.189420700073242, | |
| "learning_rate": 4.983214400188943e-05, | |
| "loss": 0.9012, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.010121969735310491, | |
| "eval_cer": 0.22362332855033584, | |
| "eval_loss": 0.3546978533267975, | |
| "eval_runtime": 2758.4684, | |
| "eval_samples_per_second": 5.682, | |
| "eval_steps_per_second": 0.355, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.010172579583987044, | |
| "grad_norm": 3.5027172565460205, | |
| "learning_rate": 4.983130050441149e-05, | |
| "loss": 0.5343, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.010223189432663597, | |
| "grad_norm": 7.1599836349487305, | |
| "learning_rate": 4.983045700693355e-05, | |
| "loss": 0.64, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.01027379928134015, | |
| "grad_norm": 6.179046630859375, | |
| "learning_rate": 4.982961350945561e-05, | |
| "loss": 0.6958, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.010324409130016702, | |
| "grad_norm": 6.452561855316162, | |
| "learning_rate": 4.982877001197767e-05, | |
| "loss": 0.9579, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.010375018978693253, | |
| "grad_norm": 6.45066499710083, | |
| "learning_rate": 4.982792651449972e-05, | |
| "loss": 0.8967, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.010425628827369806, | |
| "grad_norm": 8.554861068725586, | |
| "learning_rate": 4.982708301702178e-05, | |
| "loss": 1.1857, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.010476238676046358, | |
| "grad_norm": 3.6959705352783203, | |
| "learning_rate": 4.982623951954384e-05, | |
| "loss": 0.4889, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.010526848524722911, | |
| "grad_norm": 12.848132133483887, | |
| "learning_rate": 4.9825396022065894e-05, | |
| "loss": 0.64, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.010577458373399464, | |
| "grad_norm": 6.260450839996338, | |
| "learning_rate": 4.9824552524587954e-05, | |
| "loss": 1.0134, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.010628068222076016, | |
| "grad_norm": 6.700595378875732, | |
| "learning_rate": 4.982370902711001e-05, | |
| "loss": 0.8566, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.01067867807075257, | |
| "grad_norm": 6.211639404296875, | |
| "learning_rate": 4.982286552963207e-05, | |
| "loss": 0.9721, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.01072928791942912, | |
| "grad_norm": 7.872774600982666, | |
| "learning_rate": 4.982202203215413e-05, | |
| "loss": 0.4064, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.010779897768105673, | |
| "grad_norm": 9.730685234069824, | |
| "learning_rate": 4.982117853467618e-05, | |
| "loss": 1.044, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.010830507616782226, | |
| "grad_norm": 4.908615589141846, | |
| "learning_rate": 4.982033503719824e-05, | |
| "loss": 0.6847, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.010881117465458778, | |
| "grad_norm": 7.32842493057251, | |
| "learning_rate": 4.9819491539720295e-05, | |
| "loss": 0.8148, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.010931727314135331, | |
| "grad_norm": 4.537167072296143, | |
| "learning_rate": 4.9818648042242355e-05, | |
| "loss": 0.4907, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.010982337162811884, | |
| "grad_norm": 5.290032863616943, | |
| "learning_rate": 4.9817804544764416e-05, | |
| "loss": 0.6806, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.011032947011488436, | |
| "grad_norm": 6.21295166015625, | |
| "learning_rate": 4.981696104728647e-05, | |
| "loss": 0.6636, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.011083556860164987, | |
| "grad_norm": 5.210526466369629, | |
| "learning_rate": 4.981611754980853e-05, | |
| "loss": 0.7971, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.01113416670884154, | |
| "grad_norm": 40.482147216796875, | |
| "learning_rate": 4.981527405233058e-05, | |
| "loss": 0.7819, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.011184776557518093, | |
| "grad_norm": 6.940312385559082, | |
| "learning_rate": 4.981443055485264e-05, | |
| "loss": 0.8548, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.011235386406194646, | |
| "grad_norm": 10.026724815368652, | |
| "learning_rate": 4.98135870573747e-05, | |
| "loss": 0.5857, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.011285996254871198, | |
| "grad_norm": 10.645378112792969, | |
| "learning_rate": 4.9812743559896756e-05, | |
| "loss": 0.7426, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.011336606103547751, | |
| "grad_norm": 17.70381736755371, | |
| "learning_rate": 4.981190006241882e-05, | |
| "loss": 1.0465, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.011387215952224302, | |
| "grad_norm": 4.017563819885254, | |
| "learning_rate": 4.981105656494087e-05, | |
| "loss": 0.6183, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.011437825800900855, | |
| "grad_norm": 7.969531059265137, | |
| "learning_rate": 4.981021306746293e-05, | |
| "loss": 0.8661, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.011488435649577407, | |
| "grad_norm": 12.08060359954834, | |
| "learning_rate": 4.980936956998499e-05, | |
| "loss": 0.7461, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.01153904549825396, | |
| "grad_norm": 6.960525989532471, | |
| "learning_rate": 4.9808526072507044e-05, | |
| "loss": 0.5767, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.011589655346930513, | |
| "grad_norm": 10.034902572631836, | |
| "learning_rate": 4.9807682575029104e-05, | |
| "loss": 0.7012, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.011640265195607066, | |
| "grad_norm": 11.939443588256836, | |
| "learning_rate": 4.980683907755116e-05, | |
| "loss": 0.6601, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.011690875044283618, | |
| "grad_norm": 5.776790618896484, | |
| "learning_rate": 4.980599558007322e-05, | |
| "loss": 0.7514, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.01174148489296017, | |
| "grad_norm": 9.15820598602295, | |
| "learning_rate": 4.980515208259527e-05, | |
| "loss": 0.8779, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.011792094741636722, | |
| "grad_norm": 7.228218078613281, | |
| "learning_rate": 4.980430858511733e-05, | |
| "loss": 1.0643, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.011842704590313275, | |
| "grad_norm": 4.174966812133789, | |
| "learning_rate": 4.980346508763939e-05, | |
| "loss": 0.3511, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.011893314438989827, | |
| "grad_norm": 9.501602172851562, | |
| "learning_rate": 4.9802621590161445e-05, | |
| "loss": 0.8323, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.01194392428766638, | |
| "grad_norm": 4.438223361968994, | |
| "learning_rate": 4.9801778092683505e-05, | |
| "loss": 0.3112, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.011994534136342933, | |
| "grad_norm": 8.708006858825684, | |
| "learning_rate": 4.980093459520556e-05, | |
| "loss": 0.6483, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.012045143985019486, | |
| "grad_norm": 4.928430557250977, | |
| "learning_rate": 4.980009109772762e-05, | |
| "loss": 0.7263, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.012095753833696037, | |
| "grad_norm": 11.870718955993652, | |
| "learning_rate": 4.979924760024968e-05, | |
| "loss": 0.9143, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.01214636368237259, | |
| "grad_norm": 6.654867649078369, | |
| "learning_rate": 4.979840410277173e-05, | |
| "loss": 0.5257, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.012196973531049142, | |
| "grad_norm": 4.725414752960205, | |
| "learning_rate": 4.979756060529379e-05, | |
| "loss": 0.5919, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.012247583379725695, | |
| "grad_norm": 5.607127666473389, | |
| "learning_rate": 4.9796717107815846e-05, | |
| "loss": 0.5198, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.012298193228402247, | |
| "grad_norm": 5.8716864585876465, | |
| "learning_rate": 4.9795873610337906e-05, | |
| "loss": 0.7554, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.0123488030770788, | |
| "grad_norm": 7.410449504852295, | |
| "learning_rate": 4.9795030112859966e-05, | |
| "loss": 0.8967, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.012399412925755351, | |
| "grad_norm": 10.435539245605469, | |
| "learning_rate": 4.979418661538202e-05, | |
| "loss": 0.7446, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.012450022774431904, | |
| "grad_norm": 4.085461139678955, | |
| "learning_rate": 4.979334311790408e-05, | |
| "loss": 0.56, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.012500632623108457, | |
| "grad_norm": 7.522024154663086, | |
| "learning_rate": 4.9792499620426133e-05, | |
| "loss": 0.7217, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.01255124247178501, | |
| "grad_norm": 6.347813606262207, | |
| "learning_rate": 4.9791656122948194e-05, | |
| "loss": 0.5247, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.012601852320461562, | |
| "grad_norm": 8.447410583496094, | |
| "learning_rate": 4.9790812625470254e-05, | |
| "loss": 0.74, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.012652462169138115, | |
| "grad_norm": 5.143301486968994, | |
| "learning_rate": 4.978996912799231e-05, | |
| "loss": 0.5553, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.012703072017814667, | |
| "grad_norm": 5.69724702835083, | |
| "learning_rate": 4.978912563051437e-05, | |
| "loss": 0.9869, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.012753681866491218, | |
| "grad_norm": 3.5676660537719727, | |
| "learning_rate": 4.978828213303642e-05, | |
| "loss": 0.423, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.012804291715167771, | |
| "grad_norm": 5.91968297958374, | |
| "learning_rate": 4.978743863555848e-05, | |
| "loss": 0.7037, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.012854901563844324, | |
| "grad_norm": 9.412429809570312, | |
| "learning_rate": 4.978659513808054e-05, | |
| "loss": 1.1408, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.012905511412520877, | |
| "grad_norm": 8.342994689941406, | |
| "learning_rate": 4.9785751640602595e-05, | |
| "loss": 0.8661, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.01295612126119743, | |
| "grad_norm": 7.621118068695068, | |
| "learning_rate": 4.9784908143124655e-05, | |
| "loss": 0.677, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.013006731109873982, | |
| "grad_norm": 4.431066036224365, | |
| "learning_rate": 4.978406464564671e-05, | |
| "loss": 0.637, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.013057340958550535, | |
| "grad_norm": 9.607247352600098, | |
| "learning_rate": 4.978322114816877e-05, | |
| "loss": 0.5467, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.013107950807227086, | |
| "grad_norm": 8.333694458007812, | |
| "learning_rate": 4.978237765069083e-05, | |
| "loss": 1.0044, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.013158560655903638, | |
| "grad_norm": 3.5092146396636963, | |
| "learning_rate": 4.978153415321288e-05, | |
| "loss": 0.3496, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.013209170504580191, | |
| "grad_norm": 5.619466781616211, | |
| "learning_rate": 4.978069065573494e-05, | |
| "loss": 0.5608, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.013259780353256744, | |
| "grad_norm": 10.299610137939453, | |
| "learning_rate": 4.9779847158256996e-05, | |
| "loss": 0.7437, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.013310390201933297, | |
| "grad_norm": 9.14527416229248, | |
| "learning_rate": 4.9779003660779056e-05, | |
| "loss": 0.7961, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.01336100005060985, | |
| "grad_norm": 4.311985015869141, | |
| "learning_rate": 4.9778160163301116e-05, | |
| "loss": 0.5069, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.013411609899286402, | |
| "grad_norm": 7.6014862060546875, | |
| "learning_rate": 4.977731666582317e-05, | |
| "loss": 0.8133, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.013462219747962953, | |
| "grad_norm": 5.695495128631592, | |
| "learning_rate": 4.977647316834523e-05, | |
| "loss": 0.4957, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.013512829596639506, | |
| "grad_norm": 8.789469718933105, | |
| "learning_rate": 4.977562967086728e-05, | |
| "loss": 0.9009, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.013563439445316058, | |
| "grad_norm": 6.521892547607422, | |
| "learning_rate": 4.977478617338934e-05, | |
| "loss": 0.4392, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.013614049293992611, | |
| "grad_norm": 8.529374122619629, | |
| "learning_rate": 4.9773942675911404e-05, | |
| "loss": 0.507, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.013664659142669164, | |
| "grad_norm": 5.722716808319092, | |
| "learning_rate": 4.977309917843346e-05, | |
| "loss": 0.5071, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.013715268991345717, | |
| "grad_norm": 4.3265156745910645, | |
| "learning_rate": 4.977225568095552e-05, | |
| "loss": 0.7658, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.013765878840022268, | |
| "grad_norm": 5.346278190612793, | |
| "learning_rate": 4.977141218347757e-05, | |
| "loss": 0.5658, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.01381648868869882, | |
| "grad_norm": 7.274371147155762, | |
| "learning_rate": 4.977056868599963e-05, | |
| "loss": 0.8856, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.013867098537375373, | |
| "grad_norm": 5.354006767272949, | |
| "learning_rate": 4.9769725188521684e-05, | |
| "loss": 0.5224, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.013917708386051926, | |
| "grad_norm": 8.813762664794922, | |
| "learning_rate": 4.9768881691043744e-05, | |
| "loss": 1.1558, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.013968318234728478, | |
| "grad_norm": 6.957085132598877, | |
| "learning_rate": 4.9768038193565805e-05, | |
| "loss": 0.4142, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.014018928083405031, | |
| "grad_norm": 12.203534126281738, | |
| "learning_rate": 4.976719469608786e-05, | |
| "loss": 0.5945, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.014069537932081584, | |
| "grad_norm": 6.845212459564209, | |
| "learning_rate": 4.976635119860992e-05, | |
| "loss": 0.4359, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.014120147780758135, | |
| "grad_norm": 6.074872970581055, | |
| "learning_rate": 4.976550770113197e-05, | |
| "loss": 0.7704, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.014170757629434688, | |
| "grad_norm": 5.776919364929199, | |
| "learning_rate": 4.976466420365403e-05, | |
| "loss": 0.7799, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.01422136747811124, | |
| "grad_norm": 11.90626335144043, | |
| "learning_rate": 4.976382070617609e-05, | |
| "loss": 0.9279, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.014271977326787793, | |
| "grad_norm": 4.0126848220825195, | |
| "learning_rate": 4.9762977208698145e-05, | |
| "loss": 0.8148, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.014322587175464346, | |
| "grad_norm": 5.540651798248291, | |
| "learning_rate": 4.9762133711220206e-05, | |
| "loss": 0.528, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.014373197024140898, | |
| "grad_norm": 4.119876861572266, | |
| "learning_rate": 4.976129021374226e-05, | |
| "loss": 0.8705, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.014423806872817451, | |
| "grad_norm": 11.188005447387695, | |
| "learning_rate": 4.976044671626432e-05, | |
| "loss": 0.467, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.014474416721494002, | |
| "grad_norm": 6.426743030548096, | |
| "learning_rate": 4.975960321878638e-05, | |
| "loss": 0.5936, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.014525026570170555, | |
| "grad_norm": 6.039766788482666, | |
| "learning_rate": 4.975875972130843e-05, | |
| "loss": 0.7222, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.014575636418847107, | |
| "grad_norm": 12.856761932373047, | |
| "learning_rate": 4.975791622383049e-05, | |
| "loss": 0.501, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.01462624626752366, | |
| "grad_norm": 13.36010456085205, | |
| "learning_rate": 4.9757072726352547e-05, | |
| "loss": 0.473, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.014676856116200213, | |
| "grad_norm": 13.758210182189941, | |
| "learning_rate": 4.975622922887461e-05, | |
| "loss": 1.1569, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.014727465964876766, | |
| "grad_norm": 5.978826522827148, | |
| "learning_rate": 4.975538573139667e-05, | |
| "loss": 0.5853, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.014778075813553317, | |
| "grad_norm": 4.91432523727417, | |
| "learning_rate": 4.975454223391872e-05, | |
| "loss": 0.5073, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.01482868566222987, | |
| "grad_norm": 4.914419174194336, | |
| "learning_rate": 4.975369873644078e-05, | |
| "loss": 0.5638, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.014879295510906422, | |
| "grad_norm": 6.7935967445373535, | |
| "learning_rate": 4.9752855238962834e-05, | |
| "loss": 0.773, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.014929905359582975, | |
| "grad_norm": 5.500904083251953, | |
| "learning_rate": 4.9752011741484894e-05, | |
| "loss": 0.6985, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.014980515208259527, | |
| "grad_norm": 10.455467224121094, | |
| "learning_rate": 4.9751168244006954e-05, | |
| "loss": 0.5856, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.01503112505693608, | |
| "grad_norm": 14.375707626342773, | |
| "learning_rate": 4.975032474652901e-05, | |
| "loss": 0.8323, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.015081734905612633, | |
| "grad_norm": 8.137022972106934, | |
| "learning_rate": 4.974948124905107e-05, | |
| "loss": 0.7872, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.015132344754289184, | |
| "grad_norm": 4.684526443481445, | |
| "learning_rate": 4.974863775157312e-05, | |
| "loss": 0.7216, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.015182954602965737, | |
| "grad_norm": 6.151386737823486, | |
| "learning_rate": 4.974779425409518e-05, | |
| "loss": 0.7315, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.015182954602965737, | |
| "eval_cer": 0.21082101191590244, | |
| "eval_loss": 0.33135300874710083, | |
| "eval_runtime": 2751.8944, | |
| "eval_samples_per_second": 5.696, | |
| "eval_steps_per_second": 0.356, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.01523356445164229, | |
| "grad_norm": 7.473769664764404, | |
| "learning_rate": 4.974695075661724e-05, | |
| "loss": 0.3884, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.015284174300318842, | |
| "grad_norm": 6.925198078155518, | |
| "learning_rate": 4.9746107259139295e-05, | |
| "loss": 0.6214, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.015334784148995395, | |
| "grad_norm": 7.6508684158325195, | |
| "learning_rate": 4.9745263761661355e-05, | |
| "loss": 0.3224, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.015385393997671947, | |
| "grad_norm": 8.625561714172363, | |
| "learning_rate": 4.974442026418341e-05, | |
| "loss": 1.1576, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.0154360038463485, | |
| "grad_norm": 4.799185276031494, | |
| "learning_rate": 4.974357676670547e-05, | |
| "loss": 0.4953, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.015486613695025051, | |
| "grad_norm": 6.022134780883789, | |
| "learning_rate": 4.974273326922753e-05, | |
| "loss": 0.417, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.015537223543701604, | |
| "grad_norm": 6.1436333656311035, | |
| "learning_rate": 4.974188977174958e-05, | |
| "loss": 0.7479, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.015587833392378157, | |
| "grad_norm": 11.182157516479492, | |
| "learning_rate": 4.974104627427164e-05, | |
| "loss": 1.1078, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.01563844324105471, | |
| "grad_norm": 7.718019485473633, | |
| "learning_rate": 4.9740202776793696e-05, | |
| "loss": 0.7953, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.015689053089731262, | |
| "grad_norm": 6.890071392059326, | |
| "learning_rate": 4.9739359279315756e-05, | |
| "loss": 0.8054, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.015739662938407815, | |
| "grad_norm": 9.046427726745605, | |
| "learning_rate": 4.973851578183782e-05, | |
| "loss": 0.8913, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.015790272787084367, | |
| "grad_norm": 7.701432704925537, | |
| "learning_rate": 4.973767228435987e-05, | |
| "loss": 1.2825, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.01584088263576092, | |
| "grad_norm": 6.6972808837890625, | |
| "learning_rate": 4.973682878688193e-05, | |
| "loss": 0.7063, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.015891492484437473, | |
| "grad_norm": 7.926259517669678, | |
| "learning_rate": 4.9735985289403984e-05, | |
| "loss": 0.6431, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.015942102333114026, | |
| "grad_norm": 6.506617546081543, | |
| "learning_rate": 4.9735141791926044e-05, | |
| "loss": 0.7283, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.015992712181790575, | |
| "grad_norm": 5.657668590545654, | |
| "learning_rate": 4.97342982944481e-05, | |
| "loss": 0.7906, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.016043322030467128, | |
| "grad_norm": 5.814841270446777, | |
| "learning_rate": 4.973345479697016e-05, | |
| "loss": 0.7872, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.01609393187914368, | |
| "grad_norm": 4.063594818115234, | |
| "learning_rate": 4.973261129949222e-05, | |
| "loss": 0.7104, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.016144541727820233, | |
| "grad_norm": 2.756787061691284, | |
| "learning_rate": 4.973176780201427e-05, | |
| "loss": 0.3609, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.016195151576496786, | |
| "grad_norm": 6.163934707641602, | |
| "learning_rate": 4.973092430453633e-05, | |
| "loss": 0.7657, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.01624576142517334, | |
| "grad_norm": 7.708083629608154, | |
| "learning_rate": 4.9730080807058385e-05, | |
| "loss": 0.7486, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.01629637127384989, | |
| "grad_norm": 6.693125247955322, | |
| "learning_rate": 4.9729237309580445e-05, | |
| "loss": 0.799, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.016346981122526444, | |
| "grad_norm": 8.108495712280273, | |
| "learning_rate": 4.9728393812102505e-05, | |
| "loss": 0.8542, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.016397590971202997, | |
| "grad_norm": 13.466230392456055, | |
| "learning_rate": 4.972755031462456e-05, | |
| "loss": 0.9259, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.01644820081987955, | |
| "grad_norm": 7.1850056648254395, | |
| "learning_rate": 4.972670681714662e-05, | |
| "loss": 0.4547, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.016498810668556102, | |
| "grad_norm": 10.240496635437012, | |
| "learning_rate": 4.972586331966867e-05, | |
| "loss": 0.7534, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.016549420517232655, | |
| "grad_norm": 7.284756183624268, | |
| "learning_rate": 4.972501982219073e-05, | |
| "loss": 0.7111, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.016600030365909207, | |
| "grad_norm": 4.935502052307129, | |
| "learning_rate": 4.972417632471279e-05, | |
| "loss": 0.587, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.016650640214585757, | |
| "grad_norm": 9.42276382446289, | |
| "learning_rate": 4.9723332827234846e-05, | |
| "loss": 0.6914, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.01670125006326231, | |
| "grad_norm": 8.303751945495605, | |
| "learning_rate": 4.9722489329756906e-05, | |
| "loss": 0.7424, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.016751859911938862, | |
| "grad_norm": 5.425510883331299, | |
| "learning_rate": 4.972164583227896e-05, | |
| "loss": 0.4959, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.016802469760615415, | |
| "grad_norm": 8.581243515014648, | |
| "learning_rate": 4.972080233480102e-05, | |
| "loss": 0.5844, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.016853079609291968, | |
| "grad_norm": 7.702110290527344, | |
| "learning_rate": 4.971995883732308e-05, | |
| "loss": 0.6744, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.01690368945796852, | |
| "grad_norm": 5.569732189178467, | |
| "learning_rate": 4.9719115339845133e-05, | |
| "loss": 0.5237, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.016954299306645073, | |
| "grad_norm": 7.927607536315918, | |
| "learning_rate": 4.9718271842367194e-05, | |
| "loss": 0.6969, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.017004909155321626, | |
| "grad_norm": 6.792113304138184, | |
| "learning_rate": 4.971742834488925e-05, | |
| "loss": 0.4953, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.01705551900399818, | |
| "grad_norm": 41.776954650878906, | |
| "learning_rate": 4.971658484741131e-05, | |
| "loss": 0.5495, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.01710612885267473, | |
| "grad_norm": 5.085512638092041, | |
| "learning_rate": 4.971574134993337e-05, | |
| "loss": 0.6303, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.017156738701351284, | |
| "grad_norm": 5.323431491851807, | |
| "learning_rate": 4.971489785245542e-05, | |
| "loss": 0.7973, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.017207348550027837, | |
| "grad_norm": 5.844650745391846, | |
| "learning_rate": 4.971405435497748e-05, | |
| "loss": 1.0397, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.01725795839870439, | |
| "grad_norm": 1.8864692449569702, | |
| "learning_rate": 4.9713210857499535e-05, | |
| "loss": 0.3274, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.017308568247380942, | |
| "grad_norm": 3.4474141597747803, | |
| "learning_rate": 4.9712367360021595e-05, | |
| "loss": 0.5992, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.01735917809605749, | |
| "grad_norm": 5.180785655975342, | |
| "learning_rate": 4.9711523862543655e-05, | |
| "loss": 0.6462, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.017409787944734044, | |
| "grad_norm": 5.768479347229004, | |
| "learning_rate": 4.971068036506571e-05, | |
| "loss": 0.6288, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.017460397793410597, | |
| "grad_norm": 5.524656772613525, | |
| "learning_rate": 4.970983686758777e-05, | |
| "loss": 0.6045, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.01751100764208715, | |
| "grad_norm": 6.222484111785889, | |
| "learning_rate": 4.970899337010982e-05, | |
| "loss": 0.4882, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.017561617490763702, | |
| "grad_norm": 2.960340738296509, | |
| "learning_rate": 4.970814987263188e-05, | |
| "loss": 0.3803, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.017612227339440255, | |
| "grad_norm": 3.2236385345458984, | |
| "learning_rate": 4.970730637515394e-05, | |
| "loss": 0.5617, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.017662837188116808, | |
| "grad_norm": 8.282496452331543, | |
| "learning_rate": 4.9706462877675996e-05, | |
| "loss": 0.6835, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.01771344703679336, | |
| "grad_norm": 7.472006797790527, | |
| "learning_rate": 4.9705619380198056e-05, | |
| "loss": 0.4747, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.017764056885469913, | |
| "grad_norm": 6.8331780433654785, | |
| "learning_rate": 4.970477588272011e-05, | |
| "loss": 0.4765, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.017814666734146466, | |
| "grad_norm": 4.116718292236328, | |
| "learning_rate": 4.970393238524217e-05, | |
| "loss": 0.4177, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.01786527658282302, | |
| "grad_norm": 14.779850959777832, | |
| "learning_rate": 4.970308888776423e-05, | |
| "loss": 0.5939, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.01791588643149957, | |
| "grad_norm": 10.066930770874023, | |
| "learning_rate": 4.970224539028628e-05, | |
| "loss": 0.7392, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.017966496280176124, | |
| "grad_norm": 5.495025634765625, | |
| "learning_rate": 4.9701401892808343e-05, | |
| "loss": 0.6957, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.018017106128852673, | |
| "grad_norm": 9.583708763122559, | |
| "learning_rate": 4.97005583953304e-05, | |
| "loss": 0.8032, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.018067715977529226, | |
| "grad_norm": 6.409923076629639, | |
| "learning_rate": 4.969971489785246e-05, | |
| "loss": 0.9505, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.01811832582620578, | |
| "grad_norm": 8.952066421508789, | |
| "learning_rate": 4.969887140037451e-05, | |
| "loss": 0.7739, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.01816893567488233, | |
| "grad_norm": 6.334309101104736, | |
| "learning_rate": 4.969802790289657e-05, | |
| "loss": 1.0117, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.018219545523558884, | |
| "grad_norm": 8.889698028564453, | |
| "learning_rate": 4.969718440541863e-05, | |
| "loss": 0.6535, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.018270155372235437, | |
| "grad_norm": 2.9195971488952637, | |
| "learning_rate": 4.9696340907940684e-05, | |
| "loss": 0.3021, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.01832076522091199, | |
| "grad_norm": 8.154908180236816, | |
| "learning_rate": 4.9695497410462744e-05, | |
| "loss": 0.8477, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.018371375069588542, | |
| "grad_norm": 3.179515838623047, | |
| "learning_rate": 4.96946539129848e-05, | |
| "loss": 0.5856, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.018421984918265095, | |
| "grad_norm": 7.491313457489014, | |
| "learning_rate": 4.969381041550686e-05, | |
| "loss": 0.8113, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.018472594766941648, | |
| "grad_norm": 6.968503475189209, | |
| "learning_rate": 4.969296691802892e-05, | |
| "loss": 0.6095, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.0185232046156182, | |
| "grad_norm": 7.4488348960876465, | |
| "learning_rate": 4.969212342055097e-05, | |
| "loss": 1.0515, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.018573814464294753, | |
| "grad_norm": 2.959810733795166, | |
| "learning_rate": 4.969127992307303e-05, | |
| "loss": 0.5783, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.018624424312971306, | |
| "grad_norm": 9.092787742614746, | |
| "learning_rate": 4.9690436425595085e-05, | |
| "loss": 0.878, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.01867503416164786, | |
| "grad_norm": 7.564940929412842, | |
| "learning_rate": 4.9689592928117146e-05, | |
| "loss": 0.8122, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.018725644010324408, | |
| "grad_norm": 7.550844192504883, | |
| "learning_rate": 4.9688749430639206e-05, | |
| "loss": 0.4964, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.01877625385900096, | |
| "grad_norm": 11.271805763244629, | |
| "learning_rate": 4.968790593316126e-05, | |
| "loss": 0.9631, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.018826863707677513, | |
| "grad_norm": 13.20101547241211, | |
| "learning_rate": 4.968706243568332e-05, | |
| "loss": 0.6274, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.018877473556354066, | |
| "grad_norm": 5.18681526184082, | |
| "learning_rate": 4.968621893820537e-05, | |
| "loss": 0.6049, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.01892808340503062, | |
| "grad_norm": 3.5832290649414062, | |
| "learning_rate": 4.968537544072743e-05, | |
| "loss": 0.363, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.01897869325370717, | |
| "grad_norm": 4.764203071594238, | |
| "learning_rate": 4.968453194324949e-05, | |
| "loss": 0.4824, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.019029303102383724, | |
| "grad_norm": 3.9694879055023193, | |
| "learning_rate": 4.9683688445771547e-05, | |
| "loss": 0.6074, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.019079912951060277, | |
| "grad_norm": 4.219832897186279, | |
| "learning_rate": 4.968284494829361e-05, | |
| "loss": 0.3649, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.01913052279973683, | |
| "grad_norm": 25.03742027282715, | |
| "learning_rate": 4.968200145081566e-05, | |
| "loss": 0.742, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.019181132648413382, | |
| "grad_norm": 8.844772338867188, | |
| "learning_rate": 4.968115795333772e-05, | |
| "loss": 0.6859, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.019231742497089935, | |
| "grad_norm": 7.3545918464660645, | |
| "learning_rate": 4.968031445585978e-05, | |
| "loss": 0.5395, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.019282352345766487, | |
| "grad_norm": 3.499608278274536, | |
| "learning_rate": 4.9679470958381834e-05, | |
| "loss": 0.613, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.01933296219444304, | |
| "grad_norm": 4.7259440422058105, | |
| "learning_rate": 4.9678627460903894e-05, | |
| "loss": 0.7538, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.01938357204311959, | |
| "grad_norm": 5.475869178771973, | |
| "learning_rate": 4.967778396342595e-05, | |
| "loss": 0.7714, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.019434181891796142, | |
| "grad_norm": 18.313730239868164, | |
| "learning_rate": 4.967694046594801e-05, | |
| "loss": 0.9697, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.019484791740472695, | |
| "grad_norm": 8.159904479980469, | |
| "learning_rate": 4.967609696847007e-05, | |
| "loss": 0.5248, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.019535401589149248, | |
| "grad_norm": 5.177513122558594, | |
| "learning_rate": 4.967525347099212e-05, | |
| "loss": 0.4911, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.0195860114378258, | |
| "grad_norm": 4.758183479309082, | |
| "learning_rate": 4.967440997351418e-05, | |
| "loss": 0.7151, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.019636621286502353, | |
| "grad_norm": 8.288613319396973, | |
| "learning_rate": 4.9673566476036235e-05, | |
| "loss": 0.49, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.019687231135178906, | |
| "grad_norm": 8.448434829711914, | |
| "learning_rate": 4.9672722978558295e-05, | |
| "loss": 0.7352, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.01973784098385546, | |
| "grad_norm": 11.22861385345459, | |
| "learning_rate": 4.9671879481080355e-05, | |
| "loss": 0.4209, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.01978845083253201, | |
| "grad_norm": 8.725863456726074, | |
| "learning_rate": 4.967103598360241e-05, | |
| "loss": 0.5673, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.019839060681208564, | |
| "grad_norm": 6.322774410247803, | |
| "learning_rate": 4.967019248612447e-05, | |
| "loss": 0.6288, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.019889670529885117, | |
| "grad_norm": 3.711097240447998, | |
| "learning_rate": 4.966934898864652e-05, | |
| "loss": 0.6343, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.01994028037856167, | |
| "grad_norm": 7.519350528717041, | |
| "learning_rate": 4.966850549116858e-05, | |
| "loss": 0.7308, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.019990890227238222, | |
| "grad_norm": 7.28798246383667, | |
| "learning_rate": 4.966766199369064e-05, | |
| "loss": 0.5802, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.02004150007591477, | |
| "grad_norm": 5.8284783363342285, | |
| "learning_rate": 4.9666818496212696e-05, | |
| "loss": 0.5121, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.020092109924591324, | |
| "grad_norm": 6.361229419708252, | |
| "learning_rate": 4.9665974998734757e-05, | |
| "loss": 0.7215, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.020142719773267877, | |
| "grad_norm": 5.134431838989258, | |
| "learning_rate": 4.966513150125681e-05, | |
| "loss": 0.4415, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.02019332962194443, | |
| "grad_norm": 6.047237873077393, | |
| "learning_rate": 4.966428800377887e-05, | |
| "loss": 0.8233, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.020243939470620982, | |
| "grad_norm": 3.95216965675354, | |
| "learning_rate": 4.9663444506300924e-05, | |
| "loss": 0.3292, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.020243939470620982, | |
| "eval_cer": 0.21470199846112256, | |
| "eval_loss": 0.33481982350349426, | |
| "eval_runtime": 2771.1883, | |
| "eval_samples_per_second": 5.656, | |
| "eval_steps_per_second": 0.354, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.020294549319297535, | |
| "grad_norm": 11.878438949584961, | |
| "learning_rate": 4.9662601008822984e-05, | |
| "loss": 0.6688, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.020345159167974088, | |
| "grad_norm": 4.3031392097473145, | |
| "learning_rate": 4.9661757511345044e-05, | |
| "loss": 0.5529, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.02039576901665064, | |
| "grad_norm": 3.877115249633789, | |
| "learning_rate": 4.96609140138671e-05, | |
| "loss": 0.4497, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.020446378865327193, | |
| "grad_norm": 4.481441497802734, | |
| "learning_rate": 4.966007051638916e-05, | |
| "loss": 0.5141, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.020496988714003746, | |
| "grad_norm": 8.904651641845703, | |
| "learning_rate": 4.965922701891121e-05, | |
| "loss": 0.5108, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.0205475985626803, | |
| "grad_norm": 7.119351863861084, | |
| "learning_rate": 4.965838352143327e-05, | |
| "loss": 1.0192, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.02059820841135685, | |
| "grad_norm": 9.75696849822998, | |
| "learning_rate": 4.965754002395533e-05, | |
| "loss": 0.8766, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.020648818260033404, | |
| "grad_norm": 3.441471576690674, | |
| "learning_rate": 4.9656696526477385e-05, | |
| "loss": 0.4397, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.020699428108709957, | |
| "grad_norm": 5.399097442626953, | |
| "learning_rate": 4.9655853028999445e-05, | |
| "loss": 0.8645, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.020750037957386506, | |
| "grad_norm": 8.58830738067627, | |
| "learning_rate": 4.96550095315215e-05, | |
| "loss": 0.6172, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.02080064780606306, | |
| "grad_norm": 4.000862121582031, | |
| "learning_rate": 4.965416603404356e-05, | |
| "loss": 0.4244, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.02085125765473961, | |
| "grad_norm": 5.518575668334961, | |
| "learning_rate": 4.965332253656562e-05, | |
| "loss": 0.7934, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.020901867503416164, | |
| "grad_norm": 5.466125011444092, | |
| "learning_rate": 4.965247903908767e-05, | |
| "loss": 0.5228, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.020952477352092717, | |
| "grad_norm": 8.060519218444824, | |
| "learning_rate": 4.965163554160973e-05, | |
| "loss": 0.7465, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.02100308720076927, | |
| "grad_norm": 7.969659805297852, | |
| "learning_rate": 4.9650792044131786e-05, | |
| "loss": 0.7079, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.021053697049445822, | |
| "grad_norm": 5.721604347229004, | |
| "learning_rate": 4.9649948546653846e-05, | |
| "loss": 0.5805, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.021104306898122375, | |
| "grad_norm": 5.849686145782471, | |
| "learning_rate": 4.9649105049175906e-05, | |
| "loss": 0.7729, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.021154916746798928, | |
| "grad_norm": 3.3855998516082764, | |
| "learning_rate": 4.964826155169796e-05, | |
| "loss": 0.7193, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.02120552659547548, | |
| "grad_norm": 3.7108068466186523, | |
| "learning_rate": 4.964741805422002e-05, | |
| "loss": 0.5297, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.021256136444152033, | |
| "grad_norm": 7.068669319152832, | |
| "learning_rate": 4.964657455674207e-05, | |
| "loss": 0.8856, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.021306746292828586, | |
| "grad_norm": 3.230989694595337, | |
| "learning_rate": 4.9645731059264133e-05, | |
| "loss": 0.4015, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.02135735614150514, | |
| "grad_norm": 3.4090418815612793, | |
| "learning_rate": 4.9644887561786194e-05, | |
| "loss": 0.2911, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.021407965990181688, | |
| "grad_norm": 4.298464298248291, | |
| "learning_rate": 4.964404406430825e-05, | |
| "loss": 0.6512, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.02145857583885824, | |
| "grad_norm": 2.939181327819824, | |
| "learning_rate": 4.964320056683031e-05, | |
| "loss": 0.4267, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.021509185687534793, | |
| "grad_norm": 6.4090728759765625, | |
| "learning_rate": 4.964235706935236e-05, | |
| "loss": 0.5932, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.021559795536211346, | |
| "grad_norm": 10.887248992919922, | |
| "learning_rate": 4.964151357187442e-05, | |
| "loss": 0.8047, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.0216104053848879, | |
| "grad_norm": 5.774937629699707, | |
| "learning_rate": 4.964067007439648e-05, | |
| "loss": 0.7478, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.02166101523356445, | |
| "grad_norm": 4.135079860687256, | |
| "learning_rate": 4.9639826576918535e-05, | |
| "loss": 0.5991, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.021711625082241004, | |
| "grad_norm": 5.668390274047852, | |
| "learning_rate": 4.9638983079440595e-05, | |
| "loss": 0.4716, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.021762234930917557, | |
| "grad_norm": 2.0646257400512695, | |
| "learning_rate": 4.963813958196265e-05, | |
| "loss": 0.3783, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.02181284477959411, | |
| "grad_norm": 5.5543532371521, | |
| "learning_rate": 4.963729608448471e-05, | |
| "loss": 0.8937, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.021863454628270662, | |
| "grad_norm": 4.02618408203125, | |
| "learning_rate": 4.963645258700677e-05, | |
| "loss": 0.4268, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.021914064476947215, | |
| "grad_norm": 5.774693012237549, | |
| "learning_rate": 4.963560908952882e-05, | |
| "loss": 0.7588, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.021964674325623768, | |
| "grad_norm": 3.130143880844116, | |
| "learning_rate": 4.963476559205088e-05, | |
| "loss": 0.4623, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.02201528417430032, | |
| "grad_norm": 7.7838521003723145, | |
| "learning_rate": 4.9633922094572936e-05, | |
| "loss": 0.8172, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.022065894022976873, | |
| "grad_norm": 7.278140544891357, | |
| "learning_rate": 4.9633078597094996e-05, | |
| "loss": 0.4937, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.022116503871653422, | |
| "grad_norm": 3.3574812412261963, | |
| "learning_rate": 4.9632235099617056e-05, | |
| "loss": 0.4263, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.022167113720329975, | |
| "grad_norm": 5.792145729064941, | |
| "learning_rate": 4.963139160213911e-05, | |
| "loss": 0.6564, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.022217723569006528, | |
| "grad_norm": 4.801455020904541, | |
| "learning_rate": 4.963054810466117e-05, | |
| "loss": 0.4392, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.02226833341768308, | |
| "grad_norm": 5.278634548187256, | |
| "learning_rate": 4.962970460718322e-05, | |
| "loss": 0.6453, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.022318943266359633, | |
| "grad_norm": 4.173251628875732, | |
| "learning_rate": 4.962886110970528e-05, | |
| "loss": 0.5477, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.022369553115036186, | |
| "grad_norm": 3.603672981262207, | |
| "learning_rate": 4.962801761222734e-05, | |
| "loss": 0.8842, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.02242016296371274, | |
| "grad_norm": 18.358938217163086, | |
| "learning_rate": 4.96271741147494e-05, | |
| "loss": 0.8393, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.02247077281238929, | |
| "grad_norm": 6.532278537750244, | |
| "learning_rate": 4.962633061727146e-05, | |
| "loss": 0.5507, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.022521382661065844, | |
| "grad_norm": 6.95924711227417, | |
| "learning_rate": 4.962548711979351e-05, | |
| "loss": 0.6536, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.022571992509742397, | |
| "grad_norm": 3.4727678298950195, | |
| "learning_rate": 4.962464362231557e-05, | |
| "loss": 0.435, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.02262260235841895, | |
| "grad_norm": 5.473514080047607, | |
| "learning_rate": 4.9623800124837624e-05, | |
| "loss": 0.3681, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.022673212207095502, | |
| "grad_norm": 7.168368339538574, | |
| "learning_rate": 4.9622956627359684e-05, | |
| "loss": 0.6088, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.022723822055772055, | |
| "grad_norm": 9.777496337890625, | |
| "learning_rate": 4.9622113129881744e-05, | |
| "loss": 0.8791, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.022774431904448604, | |
| "grad_norm": 4.331769943237305, | |
| "learning_rate": 4.96212696324038e-05, | |
| "loss": 0.4703, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.022825041753125157, | |
| "grad_norm": 11.051033973693848, | |
| "learning_rate": 4.962042613492586e-05, | |
| "loss": 0.6771, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.02287565160180171, | |
| "grad_norm": 4.339256763458252, | |
| "learning_rate": 4.961958263744791e-05, | |
| "loss": 0.3235, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.022926261450478262, | |
| "grad_norm": 9.73657512664795, | |
| "learning_rate": 4.961873913996997e-05, | |
| "loss": 0.8238, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.022976871299154815, | |
| "grad_norm": 8.267867088317871, | |
| "learning_rate": 4.961789564249203e-05, | |
| "loss": 0.4584, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.023027481147831368, | |
| "grad_norm": 4.065835952758789, | |
| "learning_rate": 4.9617052145014085e-05, | |
| "loss": 0.5364, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.02307809099650792, | |
| "grad_norm": 9.213961601257324, | |
| "learning_rate": 4.9616208647536146e-05, | |
| "loss": 0.7232, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.023128700845184473, | |
| "grad_norm": 3.7316653728485107, | |
| "learning_rate": 4.96153651500582e-05, | |
| "loss": 0.417, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.023179310693861026, | |
| "grad_norm": 4.829885959625244, | |
| "learning_rate": 4.961452165258026e-05, | |
| "loss": 0.4171, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.02322992054253758, | |
| "grad_norm": 13.912676811218262, | |
| "learning_rate": 4.961367815510232e-05, | |
| "loss": 0.5439, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.02328053039121413, | |
| "grad_norm": 6.918741226196289, | |
| "learning_rate": 4.961283465762437e-05, | |
| "loss": 0.6434, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.023331140239890684, | |
| "grad_norm": 7.197675704956055, | |
| "learning_rate": 4.961199116014643e-05, | |
| "loss": 0.7114, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.023381750088567237, | |
| "grad_norm": 6.958329200744629, | |
| "learning_rate": 4.9611147662668486e-05, | |
| "loss": 0.6549, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.02343235993724379, | |
| "grad_norm": 7.052434921264648, | |
| "learning_rate": 4.9610304165190547e-05, | |
| "loss": 0.8423, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.02348296978592034, | |
| "grad_norm": 4.447729110717773, | |
| "learning_rate": 4.960946066771261e-05, | |
| "loss": 0.4996, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.02353357963459689, | |
| "grad_norm": 8.922036170959473, | |
| "learning_rate": 4.960861717023466e-05, | |
| "loss": 0.7087, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.023584189483273444, | |
| "grad_norm": 5.072062969207764, | |
| "learning_rate": 4.960777367275672e-05, | |
| "loss": 0.6203, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.023634799331949997, | |
| "grad_norm": 2.5875844955444336, | |
| "learning_rate": 4.9606930175278774e-05, | |
| "loss": 0.3663, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.02368540918062655, | |
| "grad_norm": 5.000091552734375, | |
| "learning_rate": 4.9606086677800834e-05, | |
| "loss": 0.2929, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.023736019029303102, | |
| "grad_norm": 5.237270355224609, | |
| "learning_rate": 4.9605243180322894e-05, | |
| "loss": 0.632, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.023786628877979655, | |
| "grad_norm": 9.747302055358887, | |
| "learning_rate": 4.960439968284495e-05, | |
| "loss": 0.7382, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.023837238726656208, | |
| "grad_norm": 7.886781215667725, | |
| "learning_rate": 4.960355618536701e-05, | |
| "loss": 0.6935, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.02388784857533276, | |
| "grad_norm": 6.9037885665893555, | |
| "learning_rate": 4.960271268788906e-05, | |
| "loss": 0.631, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.023938458424009313, | |
| "grad_norm": 4.556064128875732, | |
| "learning_rate": 4.960186919041112e-05, | |
| "loss": 0.5162, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.023989068272685866, | |
| "grad_norm": 6.1615519523620605, | |
| "learning_rate": 4.960102569293318e-05, | |
| "loss": 0.6383, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.02403967812136242, | |
| "grad_norm": 142.0865020751953, | |
| "learning_rate": 4.9600182195455235e-05, | |
| "loss": 0.7586, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.02409028797003897, | |
| "grad_norm": 10.337366104125977, | |
| "learning_rate": 4.9599338697977295e-05, | |
| "loss": 0.5753, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.02414089781871552, | |
| "grad_norm": 6.216306686401367, | |
| "learning_rate": 4.959849520049935e-05, | |
| "loss": 0.6553, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.024191507667392073, | |
| "grad_norm": 2.8385775089263916, | |
| "learning_rate": 4.959765170302141e-05, | |
| "loss": 0.3002, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.024242117516068626, | |
| "grad_norm": 5.520168304443359, | |
| "learning_rate": 4.959680820554347e-05, | |
| "loss": 0.6907, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.02429272736474518, | |
| "grad_norm": 3.9114444255828857, | |
| "learning_rate": 4.959596470806552e-05, | |
| "loss": 0.419, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.02434333721342173, | |
| "grad_norm": 6.165211200714111, | |
| "learning_rate": 4.959512121058758e-05, | |
| "loss": 0.6344, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.024393947062098284, | |
| "grad_norm": 4.263137340545654, | |
| "learning_rate": 4.9594277713109636e-05, | |
| "loss": 0.5867, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.024444556910774837, | |
| "grad_norm": 2.720306873321533, | |
| "learning_rate": 4.9593434215631696e-05, | |
| "loss": 0.3657, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.02449516675945139, | |
| "grad_norm": 6.873605251312256, | |
| "learning_rate": 4.959259071815375e-05, | |
| "loss": 0.6407, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.024545776608127942, | |
| "grad_norm": 16.427352905273438, | |
| "learning_rate": 4.959174722067581e-05, | |
| "loss": 0.8508, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.024596386456804495, | |
| "grad_norm": 8.866124153137207, | |
| "learning_rate": 4.959090372319787e-05, | |
| "loss": 0.803, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.024646996305481048, | |
| "grad_norm": 5.541032791137695, | |
| "learning_rate": 4.9590060225719924e-05, | |
| "loss": 0.6485, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.0246976061541576, | |
| "grad_norm": 2.8217546939849854, | |
| "learning_rate": 4.9589216728241984e-05, | |
| "loss": 0.3261, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.024748216002834153, | |
| "grad_norm": 6.076355457305908, | |
| "learning_rate": 4.958837323076404e-05, | |
| "loss": 0.5189, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.024798825851510702, | |
| "grad_norm": 2.687714099884033, | |
| "learning_rate": 4.95875297332861e-05, | |
| "loss": 0.3242, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.024849435700187255, | |
| "grad_norm": 6.32763671875, | |
| "learning_rate": 4.958668623580816e-05, | |
| "loss": 0.6136, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.024900045548863808, | |
| "grad_norm": 11.270010948181152, | |
| "learning_rate": 4.958584273833021e-05, | |
| "loss": 0.8674, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.02495065539754036, | |
| "grad_norm": 5.883991718292236, | |
| "learning_rate": 4.958499924085227e-05, | |
| "loss": 0.5242, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.025001265246216913, | |
| "grad_norm": 6.999844551086426, | |
| "learning_rate": 4.9584155743374325e-05, | |
| "loss": 0.5351, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.025051875094893466, | |
| "grad_norm": 6.340963363647461, | |
| "learning_rate": 4.9583312245896385e-05, | |
| "loss": 0.5624, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.02510248494357002, | |
| "grad_norm": 7.818021774291992, | |
| "learning_rate": 4.9582468748418445e-05, | |
| "loss": 0.9044, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.02515309479224657, | |
| "grad_norm": 6.447050094604492, | |
| "learning_rate": 4.95816252509405e-05, | |
| "loss": 0.7352, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.025203704640923124, | |
| "grad_norm": 4.902888774871826, | |
| "learning_rate": 4.958078175346256e-05, | |
| "loss": 0.4516, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.025254314489599677, | |
| "grad_norm": 71.4312973022461, | |
| "learning_rate": 4.957993825598461e-05, | |
| "loss": 0.6593, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.02530492433827623, | |
| "grad_norm": 5.481997489929199, | |
| "learning_rate": 4.957909475850667e-05, | |
| "loss": 0.4801, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.02530492433827623, | |
| "eval_cer": 0.20989040697069894, | |
| "eval_loss": 0.3260483741760254, | |
| "eval_runtime": 2649.7137, | |
| "eval_samples_per_second": 5.915, | |
| "eval_steps_per_second": 0.37, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.025355534186952782, | |
| "grad_norm": 13.54730224609375, | |
| "learning_rate": 4.957825126102873e-05, | |
| "loss": 0.5685, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.025406144035629335, | |
| "grad_norm": 8.101643562316895, | |
| "learning_rate": 4.9577407763550786e-05, | |
| "loss": 0.6376, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.025456753884305888, | |
| "grad_norm": 5.483541965484619, | |
| "learning_rate": 4.9576564266072846e-05, | |
| "loss": 0.5417, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.025507363732982437, | |
| "grad_norm": 5.6926493644714355, | |
| "learning_rate": 4.95757207685949e-05, | |
| "loss": 0.5145, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.02555797358165899, | |
| "grad_norm": 4.287508487701416, | |
| "learning_rate": 4.957487727111696e-05, | |
| "loss": 0.5817, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.025608583430335542, | |
| "grad_norm": 3.4692742824554443, | |
| "learning_rate": 4.957403377363902e-05, | |
| "loss": 0.4781, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.025659193279012095, | |
| "grad_norm": 6.066647052764893, | |
| "learning_rate": 4.957319027616107e-05, | |
| "loss": 0.5475, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.025709803127688648, | |
| "grad_norm": 3.1936872005462646, | |
| "learning_rate": 4.9572346778683134e-05, | |
| "loss": 0.6294, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.0257604129763652, | |
| "grad_norm": 10.364377975463867, | |
| "learning_rate": 4.957150328120519e-05, | |
| "loss": 0.7561, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.025811022825041753, | |
| "grad_norm": 5.162211894989014, | |
| "learning_rate": 4.957065978372725e-05, | |
| "loss": 0.5821, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.025861632673718306, | |
| "grad_norm": 8.685132026672363, | |
| "learning_rate": 4.956981628624931e-05, | |
| "loss": 0.7048, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.02591224252239486, | |
| "grad_norm": 4.4980316162109375, | |
| "learning_rate": 4.956897278877136e-05, | |
| "loss": 0.5405, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.02596285237107141, | |
| "grad_norm": 5.45166540145874, | |
| "learning_rate": 4.956812929129342e-05, | |
| "loss": 0.3885, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.026013462219747964, | |
| "grad_norm": 7.379059314727783, | |
| "learning_rate": 4.9567285793815474e-05, | |
| "loss": 0.5148, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.026064072068424517, | |
| "grad_norm": 4.766157150268555, | |
| "learning_rate": 4.9566442296337535e-05, | |
| "loss": 0.7287, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.02611468191710107, | |
| "grad_norm": 7.131243705749512, | |
| "learning_rate": 4.9565598798859595e-05, | |
| "loss": 0.6868, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.02616529176577762, | |
| "grad_norm": 9.707527160644531, | |
| "learning_rate": 4.956475530138165e-05, | |
| "loss": 0.6592, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.02621590161445417, | |
| "grad_norm": 5.630888938903809, | |
| "learning_rate": 4.956391180390371e-05, | |
| "loss": 0.5028, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.026266511463130724, | |
| "grad_norm": 3.7485404014587402, | |
| "learning_rate": 4.956306830642576e-05, | |
| "loss": 0.3452, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.026317121311807277, | |
| "grad_norm": 6.3416643142700195, | |
| "learning_rate": 4.956222480894782e-05, | |
| "loss": 0.4316, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.02636773116048383, | |
| "grad_norm": 8.458013534545898, | |
| "learning_rate": 4.956138131146988e-05, | |
| "loss": 0.8142, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.026418341009160382, | |
| "grad_norm": 9.704322814941406, | |
| "learning_rate": 4.9560537813991936e-05, | |
| "loss": 0.7614, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.026468950857836935, | |
| "grad_norm": 3.311298370361328, | |
| "learning_rate": 4.9559694316513996e-05, | |
| "loss": 0.5417, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.026519560706513488, | |
| "grad_norm": 7.961212635040283, | |
| "learning_rate": 4.955885081903605e-05, | |
| "loss": 0.6401, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.02657017055519004, | |
| "grad_norm": 6.631721496582031, | |
| "learning_rate": 4.955800732155811e-05, | |
| "loss": 0.5357, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.026620780403866593, | |
| "grad_norm": 5.143334865570068, | |
| "learning_rate": 4.955716382408016e-05, | |
| "loss": 0.545, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.026671390252543146, | |
| "grad_norm": 8.77175235748291, | |
| "learning_rate": 4.955632032660222e-05, | |
| "loss": 0.5938, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.0267220001012197, | |
| "grad_norm": 10.350188255310059, | |
| "learning_rate": 4.955547682912428e-05, | |
| "loss": 0.9662, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.02677260994989625, | |
| "grad_norm": 6.271733283996582, | |
| "learning_rate": 4.955463333164634e-05, | |
| "loss": 0.5715, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.026823219798572804, | |
| "grad_norm": 5.548452854156494, | |
| "learning_rate": 4.95537898341684e-05, | |
| "loss": 0.5213, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.026873829647249353, | |
| "grad_norm": 5.460413455963135, | |
| "learning_rate": 4.955294633669045e-05, | |
| "loss": 0.3746, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.026924439495925906, | |
| "grad_norm": 10.801025390625, | |
| "learning_rate": 4.955210283921251e-05, | |
| "loss": 0.66, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.02697504934460246, | |
| "grad_norm": 35.335445404052734, | |
| "learning_rate": 4.955125934173457e-05, | |
| "loss": 0.5122, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.02702565919327901, | |
| "grad_norm": 3.974865436553955, | |
| "learning_rate": 4.9550415844256624e-05, | |
| "loss": 0.3642, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.027076269041955564, | |
| "grad_norm": 6.508527755737305, | |
| "learning_rate": 4.9549572346778684e-05, | |
| "loss": 0.6126, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.027126878890632117, | |
| "grad_norm": 11.70407772064209, | |
| "learning_rate": 4.954872884930074e-05, | |
| "loss": 0.4939, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.02717748873930867, | |
| "grad_norm": 9.648119926452637, | |
| "learning_rate": 4.95478853518228e-05, | |
| "loss": 0.6462, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.027228098587985222, | |
| "grad_norm": 8.660693168640137, | |
| "learning_rate": 4.954704185434486e-05, | |
| "loss": 0.5923, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.027278708436661775, | |
| "grad_norm": 17.504438400268555, | |
| "learning_rate": 4.954619835686691e-05, | |
| "loss": 0.9972, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.027329318285338328, | |
| "grad_norm": 6.019506454467773, | |
| "learning_rate": 4.954535485938897e-05, | |
| "loss": 0.5884, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.02737992813401488, | |
| "grad_norm": 11.403207778930664, | |
| "learning_rate": 4.9544511361911025e-05, | |
| "loss": 0.7651, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.027430537982691433, | |
| "grad_norm": 3.842545747756958, | |
| "learning_rate": 4.9543667864433085e-05, | |
| "loss": 0.3572, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.027481147831367986, | |
| "grad_norm": 4.691946029663086, | |
| "learning_rate": 4.9542824366955146e-05, | |
| "loss": 0.556, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.027531757680044535, | |
| "grad_norm": 8.138900756835938, | |
| "learning_rate": 4.95419808694772e-05, | |
| "loss": 0.545, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.027582367528721088, | |
| "grad_norm": 7.7400431632995605, | |
| "learning_rate": 4.954113737199926e-05, | |
| "loss": 0.6399, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.02763297737739764, | |
| "grad_norm": 18.444286346435547, | |
| "learning_rate": 4.954029387452131e-05, | |
| "loss": 0.9819, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.027683587226074193, | |
| "grad_norm": 4.818946838378906, | |
| "learning_rate": 4.953945037704337e-05, | |
| "loss": 0.5091, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.027734197074750746, | |
| "grad_norm": 6.969218730926514, | |
| "learning_rate": 4.953860687956543e-05, | |
| "loss": 0.6144, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.0277848069234273, | |
| "grad_norm": 5.701696395874023, | |
| "learning_rate": 4.9537763382087486e-05, | |
| "loss": 0.6378, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.02783541677210385, | |
| "grad_norm": 4.948043346405029, | |
| "learning_rate": 4.953691988460955e-05, | |
| "loss": 0.61, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.027886026620780404, | |
| "grad_norm": 6.133516788482666, | |
| "learning_rate": 4.95360763871316e-05, | |
| "loss": 0.5396, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.027936636469456957, | |
| "grad_norm": 4.206554889678955, | |
| "learning_rate": 4.953523288965366e-05, | |
| "loss": 0.3066, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.02798724631813351, | |
| "grad_norm": 3.7985496520996094, | |
| "learning_rate": 4.953438939217572e-05, | |
| "loss": 0.5389, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.028037856166810062, | |
| "grad_norm": 6.991200923919678, | |
| "learning_rate": 4.9533545894697774e-05, | |
| "loss": 0.7457, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.028088466015486615, | |
| "grad_norm": 5.731369495391846, | |
| "learning_rate": 4.9532702397219834e-05, | |
| "loss": 0.3976, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.028139075864163168, | |
| "grad_norm": 7.29412317276001, | |
| "learning_rate": 4.953185889974189e-05, | |
| "loss": 0.56, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.02818968571283972, | |
| "grad_norm": 10.055305480957031, | |
| "learning_rate": 4.953101540226395e-05, | |
| "loss": 0.8518, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.02824029556151627, | |
| "grad_norm": 5.1682047843933105, | |
| "learning_rate": 4.953017190478601e-05, | |
| "loss": 0.3921, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.028290905410192822, | |
| "grad_norm": 7.5293049812316895, | |
| "learning_rate": 4.952932840730806e-05, | |
| "loss": 0.732, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.028341515258869375, | |
| "grad_norm": 7.173330783843994, | |
| "learning_rate": 4.952848490983012e-05, | |
| "loss": 0.6786, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.028392125107545928, | |
| "grad_norm": 7.453824520111084, | |
| "learning_rate": 4.9527641412352175e-05, | |
| "loss": 0.6836, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.02844273495622248, | |
| "grad_norm": 16.70603370666504, | |
| "learning_rate": 4.9526797914874235e-05, | |
| "loss": 0.8831, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.028493344804899033, | |
| "grad_norm": 8.2377290725708, | |
| "learning_rate": 4.952595441739629e-05, | |
| "loss": 0.8214, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.028543954653575586, | |
| "grad_norm": 4.513237953186035, | |
| "learning_rate": 4.952511091991835e-05, | |
| "loss": 0.7878, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.02859456450225214, | |
| "grad_norm": 6.024347305297852, | |
| "learning_rate": 4.952426742244041e-05, | |
| "loss": 0.2515, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.02864517435092869, | |
| "grad_norm": 7.672776699066162, | |
| "learning_rate": 4.952342392496246e-05, | |
| "loss": 0.9578, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.028695784199605244, | |
| "grad_norm": 11.622359275817871, | |
| "learning_rate": 4.952258042748452e-05, | |
| "loss": 0.6932, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.028746394048281797, | |
| "grad_norm": 6.2994232177734375, | |
| "learning_rate": 4.9521736930006576e-05, | |
| "loss": 0.5317, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.02879700389695835, | |
| "grad_norm": 8.886155128479004, | |
| "learning_rate": 4.9520893432528636e-05, | |
| "loss": 0.9717, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.028847613745634902, | |
| "grad_norm": 3.2561750411987305, | |
| "learning_rate": 4.9520049935050696e-05, | |
| "loss": 0.3471, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.02889822359431145, | |
| "grad_norm": 6.805208683013916, | |
| "learning_rate": 4.951920643757275e-05, | |
| "loss": 0.4706, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.028948833442988004, | |
| "grad_norm": 6.713986396789551, | |
| "learning_rate": 4.951836294009481e-05, | |
| "loss": 0.7055, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.028999443291664557, | |
| "grad_norm": 5.203835964202881, | |
| "learning_rate": 4.9517519442616863e-05, | |
| "loss": 0.6585, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.02905005314034111, | |
| "grad_norm": 3.168962001800537, | |
| "learning_rate": 4.9516675945138924e-05, | |
| "loss": 0.4794, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.029100662989017662, | |
| "grad_norm": 11.134737014770508, | |
| "learning_rate": 4.9515832447660984e-05, | |
| "loss": 0.899, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.029151272837694215, | |
| "grad_norm": 5.522861480712891, | |
| "learning_rate": 4.951498895018304e-05, | |
| "loss": 0.4002, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.029201882686370768, | |
| "grad_norm": 2.908127784729004, | |
| "learning_rate": 4.95141454527051e-05, | |
| "loss": 0.4008, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.02925249253504732, | |
| "grad_norm": 11.76309871673584, | |
| "learning_rate": 4.951330195522715e-05, | |
| "loss": 0.3493, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.029303102383723873, | |
| "grad_norm": 4.505433082580566, | |
| "learning_rate": 4.951245845774921e-05, | |
| "loss": 0.3181, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.029353712232400426, | |
| "grad_norm": 4.662505149841309, | |
| "learning_rate": 4.951161496027127e-05, | |
| "loss": 0.6274, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.02940432208107698, | |
| "grad_norm": 4.870098114013672, | |
| "learning_rate": 4.9510771462793325e-05, | |
| "loss": 0.5994, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.02945493192975353, | |
| "grad_norm": 4.153433322906494, | |
| "learning_rate": 4.9509927965315385e-05, | |
| "loss": 0.3769, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.029505541778430084, | |
| "grad_norm": 8.398072242736816, | |
| "learning_rate": 4.950908446783744e-05, | |
| "loss": 0.8386, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.029556151627106633, | |
| "grad_norm": 5.86764669418335, | |
| "learning_rate": 4.95082409703595e-05, | |
| "loss": 0.3906, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.029606761475783186, | |
| "grad_norm": 4.7525200843811035, | |
| "learning_rate": 4.950739747288156e-05, | |
| "loss": 0.5805, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.02965737132445974, | |
| "grad_norm": 14.163674354553223, | |
| "learning_rate": 4.950655397540361e-05, | |
| "loss": 0.714, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.02970798117313629, | |
| "grad_norm": 6.36476469039917, | |
| "learning_rate": 4.950571047792567e-05, | |
| "loss": 0.7714, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.029758591021812844, | |
| "grad_norm": 3.8334286212921143, | |
| "learning_rate": 4.9504866980447726e-05, | |
| "loss": 0.4451, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.029809200870489397, | |
| "grad_norm": 3.1083927154541016, | |
| "learning_rate": 4.9504023482969786e-05, | |
| "loss": 0.3391, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.02985981071916595, | |
| "grad_norm": 11.239181518554688, | |
| "learning_rate": 4.9503179985491846e-05, | |
| "loss": 0.6345, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.029910420567842502, | |
| "grad_norm": 6.135115623474121, | |
| "learning_rate": 4.95023364880139e-05, | |
| "loss": 0.8366, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.029961030416519055, | |
| "grad_norm": 3.8036446571350098, | |
| "learning_rate": 4.950149299053596e-05, | |
| "loss": 0.4256, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.030011640265195608, | |
| "grad_norm": 11.396409034729004, | |
| "learning_rate": 4.950064949305801e-05, | |
| "loss": 0.5988, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.03006225011387216, | |
| "grad_norm": 6.251826286315918, | |
| "learning_rate": 4.949980599558007e-05, | |
| "loss": 0.4033, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.030112859962548713, | |
| "grad_norm": 6.115148544311523, | |
| "learning_rate": 4.9498962498102134e-05, | |
| "loss": 0.4699, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.030163469811225266, | |
| "grad_norm": 8.004677772521973, | |
| "learning_rate": 4.949811900062419e-05, | |
| "loss": 0.7723, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.03021407965990182, | |
| "grad_norm": 8.397205352783203, | |
| "learning_rate": 4.949727550314625e-05, | |
| "loss": 0.4565, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.030264689508578368, | |
| "grad_norm": 7.252643585205078, | |
| "learning_rate": 4.94964320056683e-05, | |
| "loss": 0.7132, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.03031529935725492, | |
| "grad_norm": 5.881059169769287, | |
| "learning_rate": 4.949558850819036e-05, | |
| "loss": 0.7198, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.030365909205931473, | |
| "grad_norm": 11.146910667419434, | |
| "learning_rate": 4.949474501071242e-05, | |
| "loss": 0.5668, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.030365909205931473, | |
| "eval_cer": 0.2020166573086281, | |
| "eval_loss": 0.3112943768501282, | |
| "eval_runtime": 2732.7913, | |
| "eval_samples_per_second": 5.736, | |
| "eval_steps_per_second": 0.359, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.030416519054608026, | |
| "grad_norm": 5.239530563354492, | |
| "learning_rate": 4.9493901513234474e-05, | |
| "loss": 0.8572, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.03046712890328458, | |
| "grad_norm": 6.763473987579346, | |
| "learning_rate": 4.9493058015756535e-05, | |
| "loss": 0.4804, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.03051773875196113, | |
| "grad_norm": 15.63022232055664, | |
| "learning_rate": 4.949221451827859e-05, | |
| "loss": 0.6519, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.030568348600637684, | |
| "grad_norm": 6.332169055938721, | |
| "learning_rate": 4.949137102080065e-05, | |
| "loss": 0.4457, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.030618958449314237, | |
| "grad_norm": 8.737252235412598, | |
| "learning_rate": 4.94905275233227e-05, | |
| "loss": 0.6122, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.03066956829799079, | |
| "grad_norm": 9.297114372253418, | |
| "learning_rate": 4.948968402584476e-05, | |
| "loss": 0.7155, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.030720178146667342, | |
| "grad_norm": 7.846271991729736, | |
| "learning_rate": 4.948884052836682e-05, | |
| "loss": 0.7046, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.030770787995343895, | |
| "grad_norm": 9.104436874389648, | |
| "learning_rate": 4.9487997030888875e-05, | |
| "loss": 0.5831, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.030821397844020448, | |
| "grad_norm": 4.265629768371582, | |
| "learning_rate": 4.9487153533410936e-05, | |
| "loss": 0.2498, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.030872007692697, | |
| "grad_norm": 3.7431282997131348, | |
| "learning_rate": 4.948631003593299e-05, | |
| "loss": 0.6634, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.03092261754137355, | |
| "grad_norm": 5.563877105712891, | |
| "learning_rate": 4.948546653845505e-05, | |
| "loss": 0.7814, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.030973227390050102, | |
| "grad_norm": 5.606101036071777, | |
| "learning_rate": 4.948462304097711e-05, | |
| "loss": 0.7189, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.031023837238726655, | |
| "grad_norm": 9.97654914855957, | |
| "learning_rate": 4.948377954349916e-05, | |
| "loss": 0.7406, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.031074447087403208, | |
| "grad_norm": 10.628059387207031, | |
| "learning_rate": 4.948293604602122e-05, | |
| "loss": 0.904, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.03112505693607976, | |
| "grad_norm": 2.7047297954559326, | |
| "learning_rate": 4.9482092548543277e-05, | |
| "loss": 0.3665, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.031175666784756313, | |
| "grad_norm": 8.546875953674316, | |
| "learning_rate": 4.948124905106534e-05, | |
| "loss": 0.5705, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.031226276633432866, | |
| "grad_norm": 6.664468765258789, | |
| "learning_rate": 4.94804055535874e-05, | |
| "loss": 0.518, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.03127688648210942, | |
| "grad_norm": 8.334696769714355, | |
| "learning_rate": 4.947956205610945e-05, | |
| "loss": 0.5282, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.03132749633078597, | |
| "grad_norm": 7.652597427368164, | |
| "learning_rate": 4.947871855863151e-05, | |
| "loss": 0.6678, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.031378106179462524, | |
| "grad_norm": 5.162440776824951, | |
| "learning_rate": 4.9477875061153564e-05, | |
| "loss": 0.4045, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.03142871602813908, | |
| "grad_norm": 6.496342658996582, | |
| "learning_rate": 4.9477031563675624e-05, | |
| "loss": 0.4663, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.03147932587681563, | |
| "grad_norm": 7.366580963134766, | |
| "learning_rate": 4.9476188066197684e-05, | |
| "loss": 0.6894, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.03152993572549218, | |
| "grad_norm": 13.762931823730469, | |
| "learning_rate": 4.947534456871974e-05, | |
| "loss": 0.7944, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.031580545574168735, | |
| "grad_norm": 12.880459785461426, | |
| "learning_rate": 4.94745010712418e-05, | |
| "loss": 0.7428, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.03163115542284529, | |
| "grad_norm": 6.380914211273193, | |
| "learning_rate": 4.947365757376385e-05, | |
| "loss": 0.6661, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.03168176527152184, | |
| "grad_norm": 7.456797122955322, | |
| "learning_rate": 4.947281407628591e-05, | |
| "loss": 0.7024, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.03173237512019839, | |
| "grad_norm": 4.948566913604736, | |
| "learning_rate": 4.947197057880797e-05, | |
| "loss": 0.5951, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.031782984968874946, | |
| "grad_norm": 7.60888147354126, | |
| "learning_rate": 4.9471127081330025e-05, | |
| "loss": 0.4111, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.0318335948175515, | |
| "grad_norm": 5.659879684448242, | |
| "learning_rate": 4.9470283583852085e-05, | |
| "loss": 0.6262, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.03188420466622805, | |
| "grad_norm": 6.134880542755127, | |
| "learning_rate": 4.946944008637414e-05, | |
| "loss": 0.5249, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.0319348145149046, | |
| "grad_norm": 6.531559467315674, | |
| "learning_rate": 4.94685965888962e-05, | |
| "loss": 0.5775, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.03198542436358115, | |
| "grad_norm": 12.462188720703125, | |
| "learning_rate": 4.946775309141826e-05, | |
| "loss": 0.6286, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.0320360342122577, | |
| "grad_norm": 2.809241533279419, | |
| "learning_rate": 4.946690959394031e-05, | |
| "loss": 0.3842, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.032086644060934255, | |
| "grad_norm": 5.521834850311279, | |
| "learning_rate": 4.946606609646237e-05, | |
| "loss": 0.4794, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.03213725390961081, | |
| "grad_norm": 5.101696491241455, | |
| "learning_rate": 4.9465222598984426e-05, | |
| "loss": 0.4713, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.03218786375828736, | |
| "grad_norm": 6.392932891845703, | |
| "learning_rate": 4.9464379101506486e-05, | |
| "loss": 0.6618, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.03223847360696391, | |
| "grad_norm": 4.8280768394470215, | |
| "learning_rate": 4.946353560402855e-05, | |
| "loss": 0.3516, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 0.032289083455640466, | |
| "grad_norm": 3.3262429237365723, | |
| "learning_rate": 4.94626921065506e-05, | |
| "loss": 0.5898, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.03233969330431702, | |
| "grad_norm": 5.624804496765137, | |
| "learning_rate": 4.946184860907266e-05, | |
| "loss": 0.5552, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 0.03239030315299357, | |
| "grad_norm": 3.1129558086395264, | |
| "learning_rate": 4.9461005111594714e-05, | |
| "loss": 0.6172, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.032440913001670124, | |
| "grad_norm": 3.5634653568267822, | |
| "learning_rate": 4.9460161614116774e-05, | |
| "loss": 0.3858, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 0.03249152285034668, | |
| "grad_norm": 7.236937046051025, | |
| "learning_rate": 4.9459318116638834e-05, | |
| "loss": 0.6032, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 0.03254213269902323, | |
| "grad_norm": 3.344967842102051, | |
| "learning_rate": 4.945847461916089e-05, | |
| "loss": 0.5774, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 0.03259274254769978, | |
| "grad_norm": 3.4330480098724365, | |
| "learning_rate": 4.945763112168295e-05, | |
| "loss": 0.7142, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.032643352396376335, | |
| "grad_norm": 5.767812728881836, | |
| "learning_rate": 4.9456787624205e-05, | |
| "loss": 0.5658, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.03269396224505289, | |
| "grad_norm": 5.187238693237305, | |
| "learning_rate": 4.945594412672707e-05, | |
| "loss": 0.346, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 0.03274457209372944, | |
| "grad_norm": 5.828567028045654, | |
| "learning_rate": 4.945510062924912e-05, | |
| "loss": 0.3802, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 0.03279518194240599, | |
| "grad_norm": 6.394260406494141, | |
| "learning_rate": 4.945425713177118e-05, | |
| "loss": 1.0168, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 0.032845791791082546, | |
| "grad_norm": 4.123929977416992, | |
| "learning_rate": 4.945341363429324e-05, | |
| "loss": 0.4564, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 0.0328964016397591, | |
| "grad_norm": 6.843530178070068, | |
| "learning_rate": 4.9452570136815295e-05, | |
| "loss": 0.6159, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.03294701148843565, | |
| "grad_norm": 10.560795783996582, | |
| "learning_rate": 4.9451726639337356e-05, | |
| "loss": 0.517, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.032997621337112204, | |
| "grad_norm": 2.8675217628479004, | |
| "learning_rate": 4.945088314185941e-05, | |
| "loss": 0.6848, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 0.03304823118578876, | |
| "grad_norm": 3.9155211448669434, | |
| "learning_rate": 4.945003964438147e-05, | |
| "loss": 0.6188, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 0.03309884103446531, | |
| "grad_norm": 6.229773998260498, | |
| "learning_rate": 4.944919614690352e-05, | |
| "loss": 0.4295, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 0.03314945088314186, | |
| "grad_norm": 4.002541542053223, | |
| "learning_rate": 4.944835264942558e-05, | |
| "loss": 0.5816, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.033200060731818415, | |
| "grad_norm": 6.393809795379639, | |
| "learning_rate": 4.944750915194764e-05, | |
| "loss": 0.8652, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.03325067058049497, | |
| "grad_norm": 3.2906885147094727, | |
| "learning_rate": 4.9446665654469696e-05, | |
| "loss": 0.6938, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 0.03330128042917151, | |
| "grad_norm": 2.870098352432251, | |
| "learning_rate": 4.944582215699176e-05, | |
| "loss": 0.2623, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 0.033351890277848066, | |
| "grad_norm": 5.652560710906982, | |
| "learning_rate": 4.944497865951381e-05, | |
| "loss": 0.5474, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 0.03340250012652462, | |
| "grad_norm": 12.31924819946289, | |
| "learning_rate": 4.944413516203587e-05, | |
| "loss": 0.7774, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.03345310997520117, | |
| "grad_norm": 4.865198135375977, | |
| "learning_rate": 4.944329166455793e-05, | |
| "loss": 0.2116, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 0.033503719823877724, | |
| "grad_norm": 4.536335468292236, | |
| "learning_rate": 4.9442448167079984e-05, | |
| "loss": 0.541, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 0.03355432967255428, | |
| "grad_norm": 14.479270935058594, | |
| "learning_rate": 4.9441604669602044e-05, | |
| "loss": 0.3704, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 0.03360493952123083, | |
| "grad_norm": 7.071706295013428, | |
| "learning_rate": 4.94407611721241e-05, | |
| "loss": 0.5868, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 0.03365554936990738, | |
| "grad_norm": 12.812637329101562, | |
| "learning_rate": 4.943991767464616e-05, | |
| "loss": 0.7385, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.033706159218583935, | |
| "grad_norm": 10.205668449401855, | |
| "learning_rate": 4.943907417716822e-05, | |
| "loss": 0.5933, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 0.03375676906726049, | |
| "grad_norm": 6.436938762664795, | |
| "learning_rate": 4.943823067969027e-05, | |
| "loss": 1.0516, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 0.03380737891593704, | |
| "grad_norm": 4.715915679931641, | |
| "learning_rate": 4.943738718221233e-05, | |
| "loss": 0.5055, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 0.03385798876461359, | |
| "grad_norm": 5.58711051940918, | |
| "learning_rate": 4.9436543684734385e-05, | |
| "loss": 0.4184, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 0.033908598613290146, | |
| "grad_norm": 16.581960678100586, | |
| "learning_rate": 4.9435700187256445e-05, | |
| "loss": 0.478, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.0339592084619667, | |
| "grad_norm": 7.326168060302734, | |
| "learning_rate": 4.9434856689778505e-05, | |
| "loss": 0.7105, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 0.03400981831064325, | |
| "grad_norm": 4.85835075378418, | |
| "learning_rate": 4.943401319230056e-05, | |
| "loss": 0.7862, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.034060428159319804, | |
| "grad_norm": 3.9227964878082275, | |
| "learning_rate": 4.943316969482262e-05, | |
| "loss": 0.5298, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 0.03411103800799636, | |
| "grad_norm": 6.2520904541015625, | |
| "learning_rate": 4.943232619734467e-05, | |
| "loss": 0.5795, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 0.03416164785667291, | |
| "grad_norm": 6.502024173736572, | |
| "learning_rate": 4.943148269986673e-05, | |
| "loss": 0.4504, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.03421225770534946, | |
| "grad_norm": 6.299930095672607, | |
| "learning_rate": 4.943063920238879e-05, | |
| "loss": 0.8499, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 0.034262867554026015, | |
| "grad_norm": 3.0065736770629883, | |
| "learning_rate": 4.9429795704910846e-05, | |
| "loss": 0.3785, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 0.03431347740270257, | |
| "grad_norm": 6.053264617919922, | |
| "learning_rate": 4.9428952207432906e-05, | |
| "loss": 0.4405, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 0.03436408725137912, | |
| "grad_norm": 2.8478128910064697, | |
| "learning_rate": 4.942810870995496e-05, | |
| "loss": 0.3576, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 0.03441469710005567, | |
| "grad_norm": 4.3910369873046875, | |
| "learning_rate": 4.942726521247702e-05, | |
| "loss": 0.6643, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.034465306948732226, | |
| "grad_norm": 8.032602310180664, | |
| "learning_rate": 4.942642171499908e-05, | |
| "loss": 0.3172, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 0.03451591679740878, | |
| "grad_norm": 4.423160552978516, | |
| "learning_rate": 4.9425578217521134e-05, | |
| "loss": 0.5107, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 0.03456652664608533, | |
| "grad_norm": 4.310211181640625, | |
| "learning_rate": 4.9424734720043194e-05, | |
| "loss": 0.7117, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 0.034617136494761884, | |
| "grad_norm": 7.594289779663086, | |
| "learning_rate": 4.942389122256525e-05, | |
| "loss": 0.3758, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 0.03466774634343843, | |
| "grad_norm": 10.414362907409668, | |
| "learning_rate": 4.942304772508731e-05, | |
| "loss": 0.9048, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.03471835619211498, | |
| "grad_norm": 5.809612274169922, | |
| "learning_rate": 4.942220422760937e-05, | |
| "loss": 0.4148, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 0.034768966040791535, | |
| "grad_norm": 7.8530449867248535, | |
| "learning_rate": 4.942136073013142e-05, | |
| "loss": 0.6252, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 0.03481957588946809, | |
| "grad_norm": 9.151339530944824, | |
| "learning_rate": 4.942051723265348e-05, | |
| "loss": 0.6158, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 0.03487018573814464, | |
| "grad_norm": 20.164710998535156, | |
| "learning_rate": 4.9419673735175535e-05, | |
| "loss": 0.6702, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 0.03492079558682119, | |
| "grad_norm": 6.756007194519043, | |
| "learning_rate": 4.9418830237697595e-05, | |
| "loss": 0.2371, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.034971405435497746, | |
| "grad_norm": 6.241485595703125, | |
| "learning_rate": 4.9417986740219655e-05, | |
| "loss": 0.7635, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 0.0350220152841743, | |
| "grad_norm": 10.091927528381348, | |
| "learning_rate": 4.941714324274171e-05, | |
| "loss": 0.8506, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 0.03507262513285085, | |
| "grad_norm": 7.4213080406188965, | |
| "learning_rate": 4.941629974526377e-05, | |
| "loss": 0.5124, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 0.035123234981527404, | |
| "grad_norm": 11.214619636535645, | |
| "learning_rate": 4.941545624778582e-05, | |
| "loss": 0.4202, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 0.03517384483020396, | |
| "grad_norm": 8.220820426940918, | |
| "learning_rate": 4.941461275030788e-05, | |
| "loss": 0.9393, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.03522445467888051, | |
| "grad_norm": 8.862229347229004, | |
| "learning_rate": 4.9413769252829936e-05, | |
| "loss": 0.813, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 0.03527506452755706, | |
| "grad_norm": 3.892012596130371, | |
| "learning_rate": 4.9412925755351996e-05, | |
| "loss": 0.4279, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 0.035325674376233615, | |
| "grad_norm": 13.525033950805664, | |
| "learning_rate": 4.9412082257874056e-05, | |
| "loss": 0.4542, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 0.03537628422491017, | |
| "grad_norm": 7.098147869110107, | |
| "learning_rate": 4.941123876039611e-05, | |
| "loss": 0.613, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 0.03542689407358672, | |
| "grad_norm": 6.280513286590576, | |
| "learning_rate": 4.941039526291817e-05, | |
| "loss": 0.8402, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.03542689407358672, | |
| "eval_cer": 0.19522168153555014, | |
| "eval_loss": 0.2994668483734131, | |
| "eval_runtime": 2672.9422, | |
| "eval_samples_per_second": 5.864, | |
| "eval_steps_per_second": 0.367, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.03547750392226327, | |
| "grad_norm": 20.038259506225586, | |
| "learning_rate": 4.940955176544022e-05, | |
| "loss": 0.4221, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 0.035528113770939826, | |
| "grad_norm": 4.178603649139404, | |
| "learning_rate": 4.940870826796228e-05, | |
| "loss": 0.5002, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 0.03557872361961638, | |
| "grad_norm": 4.7133469581604, | |
| "learning_rate": 4.9407864770484344e-05, | |
| "loss": 0.7616, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 0.03562933346829293, | |
| "grad_norm": 5.610054969787598, | |
| "learning_rate": 4.94070212730064e-05, | |
| "loss": 0.5404, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 0.035679943316969484, | |
| "grad_norm": 12.473624229431152, | |
| "learning_rate": 4.940617777552846e-05, | |
| "loss": 0.4731, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.03573055316564604, | |
| "grad_norm": 5.456740856170654, | |
| "learning_rate": 4.940533427805051e-05, | |
| "loss": 0.3353, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 0.03578116301432259, | |
| "grad_norm": 8.414679527282715, | |
| "learning_rate": 4.940449078057257e-05, | |
| "loss": 0.5136, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 0.03583177286299914, | |
| "grad_norm": 6.136534214019775, | |
| "learning_rate": 4.940364728309463e-05, | |
| "loss": 0.4605, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 0.035882382711675695, | |
| "grad_norm": 6.461178779602051, | |
| "learning_rate": 4.9402803785616684e-05, | |
| "loss": 0.395, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 0.03593299256035225, | |
| "grad_norm": 8.947699546813965, | |
| "learning_rate": 4.9401960288138745e-05, | |
| "loss": 0.8262, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.0359836024090288, | |
| "grad_norm": 7.892368793487549, | |
| "learning_rate": 4.94011167906608e-05, | |
| "loss": 0.529, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 0.036034212257705346, | |
| "grad_norm": 6.520657062530518, | |
| "learning_rate": 4.940027329318286e-05, | |
| "loss": 0.6126, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 0.0360848221063819, | |
| "grad_norm": 17.974655151367188, | |
| "learning_rate": 4.939942979570492e-05, | |
| "loss": 0.748, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 0.03613543195505845, | |
| "grad_norm": 3.9267520904541016, | |
| "learning_rate": 4.939858629822697e-05, | |
| "loss": 0.3001, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 0.036186041803735004, | |
| "grad_norm": 14.933813095092773, | |
| "learning_rate": 4.939774280074903e-05, | |
| "loss": 0.766, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.03623665165241156, | |
| "grad_norm": 9.554229736328125, | |
| "learning_rate": 4.9396899303271085e-05, | |
| "loss": 0.5409, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 0.03628726150108811, | |
| "grad_norm": 8.939488410949707, | |
| "learning_rate": 4.9396055805793146e-05, | |
| "loss": 0.4732, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 0.03633787134976466, | |
| "grad_norm": 3.313821315765381, | |
| "learning_rate": 4.9395212308315206e-05, | |
| "loss": 0.3792, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 0.036388481198441215, | |
| "grad_norm": 15.510714530944824, | |
| "learning_rate": 4.939436881083726e-05, | |
| "loss": 0.7043, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 0.03643909104711777, | |
| "grad_norm": 8.56005573272705, | |
| "learning_rate": 4.939352531335932e-05, | |
| "loss": 0.4035, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.03648970089579432, | |
| "grad_norm": 5.8778252601623535, | |
| "learning_rate": 4.939268181588137e-05, | |
| "loss": 0.3744, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 0.03654031074447087, | |
| "grad_norm": 6.815622806549072, | |
| "learning_rate": 4.939183831840343e-05, | |
| "loss": 0.5826, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 0.036590920593147426, | |
| "grad_norm": 7.3577680587768555, | |
| "learning_rate": 4.939099482092549e-05, | |
| "loss": 0.505, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 0.03664153044182398, | |
| "grad_norm": 8.071341514587402, | |
| "learning_rate": 4.939015132344755e-05, | |
| "loss": 0.7999, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 0.03669214029050053, | |
| "grad_norm": 10.3243989944458, | |
| "learning_rate": 4.938930782596961e-05, | |
| "loss": 0.6247, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.036742750139177084, | |
| "grad_norm": 4.544474124908447, | |
| "learning_rate": 4.938846432849166e-05, | |
| "loss": 0.5807, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 0.03679335998785364, | |
| "grad_norm": 9.284850120544434, | |
| "learning_rate": 4.938762083101372e-05, | |
| "loss": 0.4313, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 0.03684396983653019, | |
| "grad_norm": 7.1159749031066895, | |
| "learning_rate": 4.938677733353578e-05, | |
| "loss": 0.4875, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 0.03689457968520674, | |
| "grad_norm": 7.782135009765625, | |
| "learning_rate": 4.9385933836057834e-05, | |
| "loss": 0.8014, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 0.036945189533883295, | |
| "grad_norm": 9.787236213684082, | |
| "learning_rate": 4.9385090338579894e-05, | |
| "loss": 0.9414, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.03699579938255985, | |
| "grad_norm": 5.5786848068237305, | |
| "learning_rate": 4.938424684110195e-05, | |
| "loss": 0.6361, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 0.0370464092312364, | |
| "grad_norm": 4.597846508026123, | |
| "learning_rate": 4.938340334362401e-05, | |
| "loss": 0.2795, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 0.03709701907991295, | |
| "grad_norm": 5.9186553955078125, | |
| "learning_rate": 4.938255984614607e-05, | |
| "loss": 0.4051, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 0.037147628928589506, | |
| "grad_norm": 5.021423816680908, | |
| "learning_rate": 4.938171634866812e-05, | |
| "loss": 0.4298, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 0.03719823877726606, | |
| "grad_norm": 3.111738681793213, | |
| "learning_rate": 4.938087285119018e-05, | |
| "loss": 0.3101, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.03724884862594261, | |
| "grad_norm": 6.111041069030762, | |
| "learning_rate": 4.9380029353712235e-05, | |
| "loss": 0.6118, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 0.037299458474619164, | |
| "grad_norm": 30.16288185119629, | |
| "learning_rate": 4.9379185856234295e-05, | |
| "loss": 0.6206, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 0.03735006832329572, | |
| "grad_norm": 9.460359573364258, | |
| "learning_rate": 4.937834235875635e-05, | |
| "loss": 0.3322, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 0.03740067817197226, | |
| "grad_norm": 7.554260730743408, | |
| "learning_rate": 4.937749886127841e-05, | |
| "loss": 0.5655, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 0.037451288020648815, | |
| "grad_norm": 8.392956733703613, | |
| "learning_rate": 4.937665536380047e-05, | |
| "loss": 0.6669, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.03750189786932537, | |
| "grad_norm": 7.463123321533203, | |
| "learning_rate": 4.937581186632252e-05, | |
| "loss": 0.6082, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 0.03755250771800192, | |
| "grad_norm": 2.9462671279907227, | |
| "learning_rate": 4.937496836884458e-05, | |
| "loss": 0.7547, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 0.03760311756667847, | |
| "grad_norm": 9.454707145690918, | |
| "learning_rate": 4.9374124871366636e-05, | |
| "loss": 0.5665, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 0.037653727415355026, | |
| "grad_norm": 2.5308687686920166, | |
| "learning_rate": 4.9373281373888696e-05, | |
| "loss": 0.4683, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 0.03770433726403158, | |
| "grad_norm": 3.8949882984161377, | |
| "learning_rate": 4.937243787641076e-05, | |
| "loss": 0.3981, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.03775494711270813, | |
| "grad_norm": 3.255460739135742, | |
| "learning_rate": 4.937159437893281e-05, | |
| "loss": 0.3254, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 0.037805556961384684, | |
| "grad_norm": 10.862367630004883, | |
| "learning_rate": 4.937075088145487e-05, | |
| "loss": 0.8337, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 0.03785616681006124, | |
| "grad_norm": 12.926139831542969, | |
| "learning_rate": 4.9369907383976924e-05, | |
| "loss": 0.8218, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 0.03790677665873779, | |
| "grad_norm": 5.084683418273926, | |
| "learning_rate": 4.9369063886498984e-05, | |
| "loss": 0.4073, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 0.03795738650741434, | |
| "grad_norm": 5.375326633453369, | |
| "learning_rate": 4.9368220389021044e-05, | |
| "loss": 0.4468, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.038007996356090895, | |
| "grad_norm": 2.891730785369873, | |
| "learning_rate": 4.93673768915431e-05, | |
| "loss": 0.4952, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 0.03805860620476745, | |
| "grad_norm": 6.7384161949157715, | |
| "learning_rate": 4.936653339406516e-05, | |
| "loss": 0.6817, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 0.038109216053444, | |
| "grad_norm": 4.95111608505249, | |
| "learning_rate": 4.936568989658721e-05, | |
| "loss": 0.7322, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 0.03815982590212055, | |
| "grad_norm": 4.689101219177246, | |
| "learning_rate": 4.936484639910927e-05, | |
| "loss": 0.6041, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 0.038210435750797106, | |
| "grad_norm": 6.9700798988342285, | |
| "learning_rate": 4.936400290163133e-05, | |
| "loss": 0.4521, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.03826104559947366, | |
| "grad_norm": 5.602224826812744, | |
| "learning_rate": 4.9363159404153385e-05, | |
| "loss": 0.6519, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 0.03831165544815021, | |
| "grad_norm": 5.881453037261963, | |
| "learning_rate": 4.9362315906675445e-05, | |
| "loss": 0.6069, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 0.038362265296826764, | |
| "grad_norm": 5.395936012268066, | |
| "learning_rate": 4.93614724091975e-05, | |
| "loss": 0.3001, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 0.03841287514550332, | |
| "grad_norm": 5.4586663246154785, | |
| "learning_rate": 4.936062891171956e-05, | |
| "loss": 0.6286, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 0.03846348499417987, | |
| "grad_norm": 5.338792324066162, | |
| "learning_rate": 4.935978541424162e-05, | |
| "loss": 0.4526, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.03851409484285642, | |
| "grad_norm": 2.985135555267334, | |
| "learning_rate": 4.935894191676367e-05, | |
| "loss": 0.2444, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 0.038564704691532975, | |
| "grad_norm": 7.952456474304199, | |
| "learning_rate": 4.935809841928573e-05, | |
| "loss": 0.5033, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 0.03861531454020953, | |
| "grad_norm": 7.913976192474365, | |
| "learning_rate": 4.9357254921807786e-05, | |
| "loss": 0.5823, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 0.03866592438888608, | |
| "grad_norm": 7.781038284301758, | |
| "learning_rate": 4.9356411424329846e-05, | |
| "loss": 0.6915, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 0.03871653423756263, | |
| "grad_norm": 5.074882984161377, | |
| "learning_rate": 4.9355567926851906e-05, | |
| "loss": 0.8664, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.03876714408623918, | |
| "grad_norm": 3.08144474029541, | |
| "learning_rate": 4.935472442937396e-05, | |
| "loss": 0.3723, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 0.03881775393491573, | |
| "grad_norm": 6.625777244567871, | |
| "learning_rate": 4.935388093189602e-05, | |
| "loss": 0.5908, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 0.038868363783592284, | |
| "grad_norm": 8.451767921447754, | |
| "learning_rate": 4.9353037434418073e-05, | |
| "loss": 0.8605, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 0.03891897363226884, | |
| "grad_norm": 5.070049285888672, | |
| "learning_rate": 4.9352193936940134e-05, | |
| "loss": 0.3042, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 0.03896958348094539, | |
| "grad_norm": 7.37528133392334, | |
| "learning_rate": 4.9351350439462194e-05, | |
| "loss": 0.7376, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.03902019332962194, | |
| "grad_norm": 3.3460285663604736, | |
| "learning_rate": 4.935050694198425e-05, | |
| "loss": 0.6662, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 0.039070803178298495, | |
| "grad_norm": 3.999098062515259, | |
| "learning_rate": 4.934966344450631e-05, | |
| "loss": 0.3612, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 0.03912141302697505, | |
| "grad_norm": 6.4491868019104, | |
| "learning_rate": 4.934881994702836e-05, | |
| "loss": 0.7708, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 0.0391720228756516, | |
| "grad_norm": 4.037872314453125, | |
| "learning_rate": 4.934797644955042e-05, | |
| "loss": 0.4529, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 0.03922263272432815, | |
| "grad_norm": 7.757660865783691, | |
| "learning_rate": 4.9347132952072475e-05, | |
| "loss": 0.6572, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.039273242573004706, | |
| "grad_norm": 10.399569511413574, | |
| "learning_rate": 4.9346289454594535e-05, | |
| "loss": 0.971, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 0.03932385242168126, | |
| "grad_norm": 4.098683834075928, | |
| "learning_rate": 4.9345445957116595e-05, | |
| "loss": 0.5687, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 0.03937446227035781, | |
| "grad_norm": 4.029207706451416, | |
| "learning_rate": 4.934460245963865e-05, | |
| "loss": 0.4164, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 0.039425072119034364, | |
| "grad_norm": 9.3286771774292, | |
| "learning_rate": 4.934375896216071e-05, | |
| "loss": 0.8485, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 0.03947568196771092, | |
| "grad_norm": 8.822758674621582, | |
| "learning_rate": 4.934291546468276e-05, | |
| "loss": 0.5747, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.03952629181638747, | |
| "grad_norm": 6.740304946899414, | |
| "learning_rate": 4.934207196720482e-05, | |
| "loss": 0.5014, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 0.03957690166506402, | |
| "grad_norm": 4.9694929122924805, | |
| "learning_rate": 4.934122846972688e-05, | |
| "loss": 0.5403, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 0.039627511513740575, | |
| "grad_norm": 7.032242298126221, | |
| "learning_rate": 4.9340384972248936e-05, | |
| "loss": 0.3896, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 0.03967812136241713, | |
| "grad_norm": 3.1985256671905518, | |
| "learning_rate": 4.9339541474770996e-05, | |
| "loss": 0.4138, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 0.03972873121109368, | |
| "grad_norm": 4.911308288574219, | |
| "learning_rate": 4.933869797729305e-05, | |
| "loss": 0.6613, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.03977934105977023, | |
| "grad_norm": 10.836835861206055, | |
| "learning_rate": 4.933785447981511e-05, | |
| "loss": 0.368, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 0.039829950908446786, | |
| "grad_norm": 7.5843377113342285, | |
| "learning_rate": 4.933701098233717e-05, | |
| "loss": 0.8429, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 0.03988056075712334, | |
| "grad_norm": 4.915452003479004, | |
| "learning_rate": 4.933616748485922e-05, | |
| "loss": 0.4179, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 0.03993117060579989, | |
| "grad_norm": 4.324033260345459, | |
| "learning_rate": 4.9335323987381283e-05, | |
| "loss": 0.5588, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 0.039981780454476444, | |
| "grad_norm": 5.227353096008301, | |
| "learning_rate": 4.933448048990334e-05, | |
| "loss": 0.4188, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.040032390303153, | |
| "grad_norm": 6.144440650939941, | |
| "learning_rate": 4.93336369924254e-05, | |
| "loss": 0.7091, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 0.04008300015182954, | |
| "grad_norm": 9.614253044128418, | |
| "learning_rate": 4.933279349494746e-05, | |
| "loss": 0.5816, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 0.040133610000506095, | |
| "grad_norm": 5.539832592010498, | |
| "learning_rate": 4.933194999746951e-05, | |
| "loss": 0.2897, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 0.04018421984918265, | |
| "grad_norm": 4.285924911499023, | |
| "learning_rate": 4.933110649999157e-05, | |
| "loss": 0.4647, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 0.0402348296978592, | |
| "grad_norm": 5.418667316436768, | |
| "learning_rate": 4.9330263002513624e-05, | |
| "loss": 0.377, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.04028543954653575, | |
| "grad_norm": 7.248344421386719, | |
| "learning_rate": 4.9329419505035684e-05, | |
| "loss": 0.5599, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 0.040336049395212306, | |
| "grad_norm": 7.9554643630981445, | |
| "learning_rate": 4.9328576007557745e-05, | |
| "loss": 0.8153, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 0.04038665924388886, | |
| "grad_norm": 8.028340339660645, | |
| "learning_rate": 4.93277325100798e-05, | |
| "loss": 0.5364, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 0.04043726909256541, | |
| "grad_norm": 7.2708964347839355, | |
| "learning_rate": 4.932688901260186e-05, | |
| "loss": 0.8303, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 0.040487878941241964, | |
| "grad_norm": 5.855560779571533, | |
| "learning_rate": 4.932604551512391e-05, | |
| "loss": 0.4462, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.040487878941241964, | |
| "eval_cer": 0.18224000249547695, | |
| "eval_loss": 0.28801780939102173, | |
| "eval_runtime": 2605.2542, | |
| "eval_samples_per_second": 6.016, | |
| "eval_steps_per_second": 0.376, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.04053848878991852, | |
| "grad_norm": 10.296344757080078, | |
| "learning_rate": 4.932520201764597e-05, | |
| "loss": 0.5008, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 0.04058909863859507, | |
| "grad_norm": 8.924449920654297, | |
| "learning_rate": 4.932435852016803e-05, | |
| "loss": 0.3729, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 0.04063970848727162, | |
| "grad_norm": 5.110000133514404, | |
| "learning_rate": 4.9323515022690085e-05, | |
| "loss": 0.5821, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 0.040690318335948175, | |
| "grad_norm": 5.40463399887085, | |
| "learning_rate": 4.9322671525212146e-05, | |
| "loss": 0.6681, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 0.04074092818462473, | |
| "grad_norm": 10.994048118591309, | |
| "learning_rate": 4.93218280277342e-05, | |
| "loss": 0.453, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.04079153803330128, | |
| "grad_norm": 6.396229267120361, | |
| "learning_rate": 4.932098453025626e-05, | |
| "loss": 0.4864, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 0.04084214788197783, | |
| "grad_norm": 6.884303569793701, | |
| "learning_rate": 4.932014103277832e-05, | |
| "loss": 0.9021, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 0.040892757730654386, | |
| "grad_norm": 15.925917625427246, | |
| "learning_rate": 4.931929753530037e-05, | |
| "loss": 0.6345, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 0.04094336757933094, | |
| "grad_norm": 3.3225505352020264, | |
| "learning_rate": 4.931845403782243e-05, | |
| "loss": 0.5429, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 0.04099397742800749, | |
| "grad_norm": 5.548943519592285, | |
| "learning_rate": 4.9317610540344487e-05, | |
| "loss": 0.5081, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.041044587276684044, | |
| "grad_norm": 8.253005027770996, | |
| "learning_rate": 4.931676704286655e-05, | |
| "loss": 0.4432, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 0.0410951971253606, | |
| "grad_norm": 10.242237091064453, | |
| "learning_rate": 4.931592354538861e-05, | |
| "loss": 0.6442, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 0.04114580697403715, | |
| "grad_norm": 4.957180976867676, | |
| "learning_rate": 4.931508004791066e-05, | |
| "loss": 0.4146, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 0.0411964168227137, | |
| "grad_norm": 7.067080497741699, | |
| "learning_rate": 4.931423655043272e-05, | |
| "loss": 0.424, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 0.041247026671390255, | |
| "grad_norm": 5.9971842765808105, | |
| "learning_rate": 4.9313393052954774e-05, | |
| "loss": 0.4941, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.04129763652006681, | |
| "grad_norm": 5.412482261657715, | |
| "learning_rate": 4.9312549555476834e-05, | |
| "loss": 0.4065, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 0.04134824636874336, | |
| "grad_norm": 13.345954895019531, | |
| "learning_rate": 4.931170605799889e-05, | |
| "loss": 0.8215, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 0.04139885621741991, | |
| "grad_norm": 9.993549346923828, | |
| "learning_rate": 4.931086256052095e-05, | |
| "loss": 0.8707, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 0.04144946606609646, | |
| "grad_norm": 27.219308853149414, | |
| "learning_rate": 4.931001906304301e-05, | |
| "loss": 0.5455, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 0.04150007591477301, | |
| "grad_norm": 5.13199520111084, | |
| "learning_rate": 4.930917556556506e-05, | |
| "loss": 0.5479, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.041550685763449564, | |
| "grad_norm": 4.877132892608643, | |
| "learning_rate": 4.930833206808712e-05, | |
| "loss": 0.5966, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 0.04160129561212612, | |
| "grad_norm": 3.1169495582580566, | |
| "learning_rate": 4.9307488570609175e-05, | |
| "loss": 0.5371, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 0.04165190546080267, | |
| "grad_norm": 4.428645133972168, | |
| "learning_rate": 4.9306645073131235e-05, | |
| "loss": 0.3974, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 0.04170251530947922, | |
| "grad_norm": 12.36322021484375, | |
| "learning_rate": 4.9305801575653295e-05, | |
| "loss": 0.6203, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 0.041753125158155775, | |
| "grad_norm": 4.944615840911865, | |
| "learning_rate": 4.930495807817535e-05, | |
| "loss": 0.6994, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.04180373500683233, | |
| "grad_norm": 3.7057061195373535, | |
| "learning_rate": 4.930411458069741e-05, | |
| "loss": 0.2836, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 0.04185434485550888, | |
| "grad_norm": 5.559707164764404, | |
| "learning_rate": 4.930327108321946e-05, | |
| "loss": 0.7595, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 0.04190495470418543, | |
| "grad_norm": 6.805633544921875, | |
| "learning_rate": 4.930242758574152e-05, | |
| "loss": 0.497, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 0.041955564552861986, | |
| "grad_norm": 6.29421329498291, | |
| "learning_rate": 4.930158408826358e-05, | |
| "loss": 0.5615, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 0.04200617440153854, | |
| "grad_norm": 3.3329832553863525, | |
| "learning_rate": 4.9300740590785636e-05, | |
| "loss": 0.2914, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.04205678425021509, | |
| "grad_norm": 5.0592827796936035, | |
| "learning_rate": 4.9299897093307696e-05, | |
| "loss": 0.3799, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 0.042107394098891644, | |
| "grad_norm": 4.176607131958008, | |
| "learning_rate": 4.929905359582975e-05, | |
| "loss": 0.3987, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 0.0421580039475682, | |
| "grad_norm": 6.9350175857543945, | |
| "learning_rate": 4.929821009835181e-05, | |
| "loss": 0.337, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 0.04220861379624475, | |
| "grad_norm": 12.642129898071289, | |
| "learning_rate": 4.929736660087387e-05, | |
| "loss": 0.5035, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 0.0422592236449213, | |
| "grad_norm": 3.18143892288208, | |
| "learning_rate": 4.9296523103395924e-05, | |
| "loss": 0.2602, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.042309833493597855, | |
| "grad_norm": 7.448511600494385, | |
| "learning_rate": 4.9295679605917984e-05, | |
| "loss": 0.6351, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 0.04236044334227441, | |
| "grad_norm": 9.632100105285645, | |
| "learning_rate": 4.929483610844004e-05, | |
| "loss": 0.6639, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 0.04241105319095096, | |
| "grad_norm": 4.993129730224609, | |
| "learning_rate": 4.92939926109621e-05, | |
| "loss": 0.5902, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 0.04246166303962751, | |
| "grad_norm": 8.809192657470703, | |
| "learning_rate": 4.929314911348416e-05, | |
| "loss": 0.7136, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 0.042512272888304066, | |
| "grad_norm": 4.831273078918457, | |
| "learning_rate": 4.929230561600621e-05, | |
| "loss": 0.702, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.04256288273698062, | |
| "grad_norm": 3.406562089920044, | |
| "learning_rate": 4.929146211852827e-05, | |
| "loss": 0.7114, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 0.04261349258565717, | |
| "grad_norm": 4.17376184463501, | |
| "learning_rate": 4.9290618621050325e-05, | |
| "loss": 0.5236, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 0.042664102434333724, | |
| "grad_norm": 3.0841195583343506, | |
| "learning_rate": 4.9289775123572385e-05, | |
| "loss": 0.4595, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 0.04271471228301028, | |
| "grad_norm": 6.880139350891113, | |
| "learning_rate": 4.9288931626094445e-05, | |
| "loss": 0.6339, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 0.04276532213168683, | |
| "grad_norm": 3.7621781826019287, | |
| "learning_rate": 4.92880881286165e-05, | |
| "loss": 0.4469, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.042815931980363375, | |
| "grad_norm": 9.83583927154541, | |
| "learning_rate": 4.928724463113856e-05, | |
| "loss": 0.7447, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 0.04286654182903993, | |
| "grad_norm": 3.6700632572174072, | |
| "learning_rate": 4.928640113366061e-05, | |
| "loss": 0.3185, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 0.04291715167771648, | |
| "grad_norm": 5.93882417678833, | |
| "learning_rate": 4.928555763618267e-05, | |
| "loss": 0.5691, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 0.042967761526393033, | |
| "grad_norm": 2.9145689010620117, | |
| "learning_rate": 4.928471413870473e-05, | |
| "loss": 0.3396, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 0.043018371375069586, | |
| "grad_norm": 4.243858814239502, | |
| "learning_rate": 4.9283870641226786e-05, | |
| "loss": 0.3018, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.04306898122374614, | |
| "grad_norm": 4.957646369934082, | |
| "learning_rate": 4.9283027143748846e-05, | |
| "loss": 0.4555, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 0.04311959107242269, | |
| "grad_norm": 5.5672831535339355, | |
| "learning_rate": 4.92821836462709e-05, | |
| "loss": 0.4606, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 0.043170200921099244, | |
| "grad_norm": 4.047713756561279, | |
| "learning_rate": 4.928134014879296e-05, | |
| "loss": 0.3793, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 0.0432208107697758, | |
| "grad_norm": 7.607228755950928, | |
| "learning_rate": 4.928049665131502e-05, | |
| "loss": 0.6637, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 0.04327142061845235, | |
| "grad_norm": 11.504854202270508, | |
| "learning_rate": 4.9279653153837073e-05, | |
| "loss": 0.6826, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.0433220304671289, | |
| "grad_norm": 5.851675510406494, | |
| "learning_rate": 4.9278809656359134e-05, | |
| "loss": 0.526, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 0.043372640315805455, | |
| "grad_norm": 14.779943466186523, | |
| "learning_rate": 4.927796615888119e-05, | |
| "loss": 0.6321, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 0.04342325016448201, | |
| "grad_norm": 9.94005012512207, | |
| "learning_rate": 4.927712266140325e-05, | |
| "loss": 0.7401, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 0.04347386001315856, | |
| "grad_norm": 4.635720729827881, | |
| "learning_rate": 4.92762791639253e-05, | |
| "loss": 0.5283, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 0.04352446986183511, | |
| "grad_norm": 3.568844795227051, | |
| "learning_rate": 4.927543566644736e-05, | |
| "loss": 0.4053, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.043575079710511666, | |
| "grad_norm": 6.748814582824707, | |
| "learning_rate": 4.927459216896942e-05, | |
| "loss": 0.5624, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 0.04362568955918822, | |
| "grad_norm": 7.1615424156188965, | |
| "learning_rate": 4.9273748671491475e-05, | |
| "loss": 0.8786, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 0.04367629940786477, | |
| "grad_norm": 5.897001266479492, | |
| "learning_rate": 4.9272905174013535e-05, | |
| "loss": 0.7447, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 0.043726909256541324, | |
| "grad_norm": 6.555644512176514, | |
| "learning_rate": 4.927206167653559e-05, | |
| "loss": 0.4951, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 0.04377751910521788, | |
| "grad_norm": 8.615190505981445, | |
| "learning_rate": 4.927121817905765e-05, | |
| "loss": 0.6687, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.04382812895389443, | |
| "grad_norm": 6.572555065155029, | |
| "learning_rate": 4.927037468157971e-05, | |
| "loss": 0.4269, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 0.04387873880257098, | |
| "grad_norm": 6.60528564453125, | |
| "learning_rate": 4.926953118410176e-05, | |
| "loss": 0.4845, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 0.043929348651247535, | |
| "grad_norm": 5.96574592590332, | |
| "learning_rate": 4.926868768662382e-05, | |
| "loss": 0.6266, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 0.04397995849992409, | |
| "grad_norm": 2.911107063293457, | |
| "learning_rate": 4.9267844189145876e-05, | |
| "loss": 0.312, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 0.04403056834860064, | |
| "grad_norm": 6.024972438812256, | |
| "learning_rate": 4.9267000691667936e-05, | |
| "loss": 0.5205, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.04408117819727719, | |
| "grad_norm": 12.015813827514648, | |
| "learning_rate": 4.9266157194189996e-05, | |
| "loss": 0.621, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 0.044131788045953746, | |
| "grad_norm": 17.426437377929688, | |
| "learning_rate": 4.926531369671205e-05, | |
| "loss": 0.7822, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 0.04418239789463029, | |
| "grad_norm": 8.361766815185547, | |
| "learning_rate": 4.926447019923411e-05, | |
| "loss": 0.8237, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 0.044233007743306844, | |
| "grad_norm": 6.699423789978027, | |
| "learning_rate": 4.926362670175616e-05, | |
| "loss": 0.5673, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 0.0442836175919834, | |
| "grad_norm": 3.933328151702881, | |
| "learning_rate": 4.926278320427822e-05, | |
| "loss": 0.6946, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.04433422744065995, | |
| "grad_norm": 5.092313766479492, | |
| "learning_rate": 4.9261939706800283e-05, | |
| "loss": 0.6024, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 0.0443848372893365, | |
| "grad_norm": 16.1398868560791, | |
| "learning_rate": 4.926109620932234e-05, | |
| "loss": 0.7255, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 0.044435447138013055, | |
| "grad_norm": 4.958022117614746, | |
| "learning_rate": 4.92602527118444e-05, | |
| "loss": 0.4381, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 0.04448605698668961, | |
| "grad_norm": 6.70586633682251, | |
| "learning_rate": 4.925940921436645e-05, | |
| "loss": 0.5228, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 0.04453666683536616, | |
| "grad_norm": 3.8433001041412354, | |
| "learning_rate": 4.925856571688851e-05, | |
| "loss": 0.3635, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.04458727668404271, | |
| "grad_norm": 3.4007515907287598, | |
| "learning_rate": 4.925772221941057e-05, | |
| "loss": 0.2397, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 0.044637886532719266, | |
| "grad_norm": 12.362958908081055, | |
| "learning_rate": 4.9256878721932624e-05, | |
| "loss": 0.5092, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 0.04468849638139582, | |
| "grad_norm": 3.942413568496704, | |
| "learning_rate": 4.9256035224454684e-05, | |
| "loss": 0.4432, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 0.04473910623007237, | |
| "grad_norm": 5.850658893585205, | |
| "learning_rate": 4.925519172697674e-05, | |
| "loss": 0.8487, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 0.044789716078748924, | |
| "grad_norm": 3.8962783813476562, | |
| "learning_rate": 4.92543482294988e-05, | |
| "loss": 0.6375, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.04484032592742548, | |
| "grad_norm": 10.159829139709473, | |
| "learning_rate": 4.925350473202086e-05, | |
| "loss": 0.5135, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 0.04489093577610203, | |
| "grad_norm": 2.924187183380127, | |
| "learning_rate": 4.925266123454291e-05, | |
| "loss": 0.2508, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 0.04494154562477858, | |
| "grad_norm": 6.0558342933654785, | |
| "learning_rate": 4.925181773706497e-05, | |
| "loss": 0.3182, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 0.044992155473455135, | |
| "grad_norm": 5.556451320648193, | |
| "learning_rate": 4.9250974239587025e-05, | |
| "loss": 0.492, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 0.04504276532213169, | |
| "grad_norm": 4.185290813446045, | |
| "learning_rate": 4.9250130742109086e-05, | |
| "loss": 0.4214, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.04509337517080824, | |
| "grad_norm": 5.0443196296691895, | |
| "learning_rate": 4.9249287244631146e-05, | |
| "loss": 0.6803, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 0.04514398501948479, | |
| "grad_norm": 10.363567352294922, | |
| "learning_rate": 4.92484437471532e-05, | |
| "loss": 0.5701, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 0.045194594868161346, | |
| "grad_norm": 6.5281243324279785, | |
| "learning_rate": 4.924760024967526e-05, | |
| "loss": 0.417, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 0.0452452047168379, | |
| "grad_norm": 9.699246406555176, | |
| "learning_rate": 4.924675675219731e-05, | |
| "loss": 0.7565, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 0.04529581456551445, | |
| "grad_norm": 13.578716278076172, | |
| "learning_rate": 4.924591325471937e-05, | |
| "loss": 0.5528, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.045346424414191004, | |
| "grad_norm": 6.151464462280273, | |
| "learning_rate": 4.924506975724143e-05, | |
| "loss": 0.655, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 0.04539703426286756, | |
| "grad_norm": 11.576047897338867, | |
| "learning_rate": 4.9244226259763487e-05, | |
| "loss": 0.8649, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 0.04544764411154411, | |
| "grad_norm": 6.639153003692627, | |
| "learning_rate": 4.924338276228555e-05, | |
| "loss": 0.553, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 0.04549825396022066, | |
| "grad_norm": 4.5320563316345215, | |
| "learning_rate": 4.92425392648076e-05, | |
| "loss": 0.4832, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 0.04554886380889721, | |
| "grad_norm": 3.708400249481201, | |
| "learning_rate": 4.924169576732966e-05, | |
| "loss": 0.5646, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.04554886380889721, | |
| "eval_cer": 0.19456141992638343, | |
| "eval_loss": 0.29710084199905396, | |
| "eval_runtime": 2630.6631, | |
| "eval_samples_per_second": 5.958, | |
| "eval_steps_per_second": 0.373, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.04559947365757376, | |
| "grad_norm": 4.805381774902344, | |
| "learning_rate": 4.9240852269851714e-05, | |
| "loss": 0.5711, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 0.045650083506250314, | |
| "grad_norm": 3.130221366882324, | |
| "learning_rate": 4.9240008772373774e-05, | |
| "loss": 0.4231, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 0.045700693354926866, | |
| "grad_norm": 4.455588340759277, | |
| "learning_rate": 4.9239165274895834e-05, | |
| "loss": 0.3281, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 0.04575130320360342, | |
| "grad_norm": 2.951422929763794, | |
| "learning_rate": 4.923832177741789e-05, | |
| "loss": 0.4477, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 0.04580191305227997, | |
| "grad_norm": 5.397090435028076, | |
| "learning_rate": 4.923747827993995e-05, | |
| "loss": 0.4032, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.045852522900956524, | |
| "grad_norm": 5.767248630523682, | |
| "learning_rate": 4.9236634782462e-05, | |
| "loss": 0.6842, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 0.04590313274963308, | |
| "grad_norm": 3.4613723754882812, | |
| "learning_rate": 4.923579128498406e-05, | |
| "loss": 0.2943, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 0.04595374259830963, | |
| "grad_norm": 4.715557098388672, | |
| "learning_rate": 4.923494778750612e-05, | |
| "loss": 0.3851, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 0.04600435244698618, | |
| "grad_norm": 4.709720611572266, | |
| "learning_rate": 4.9234104290028175e-05, | |
| "loss": 0.2846, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 0.046054962295662735, | |
| "grad_norm": 2.6926629543304443, | |
| "learning_rate": 4.9233260792550235e-05, | |
| "loss": 0.474, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.04610557214433929, | |
| "grad_norm": 7.4501752853393555, | |
| "learning_rate": 4.923241729507229e-05, | |
| "loss": 0.6805, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 0.04615618199301584, | |
| "grad_norm": 4.941989421844482, | |
| "learning_rate": 4.923157379759435e-05, | |
| "loss": 0.4549, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 0.04620679184169239, | |
| "grad_norm": 7.671848297119141, | |
| "learning_rate": 4.923073030011641e-05, | |
| "loss": 0.4181, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 0.046257401690368946, | |
| "grad_norm": 1.92340886592865, | |
| "learning_rate": 4.922988680263846e-05, | |
| "loss": 0.2794, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 0.0463080115390455, | |
| "grad_norm": 4.5162200927734375, | |
| "learning_rate": 4.922904330516052e-05, | |
| "loss": 0.4803, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.04635862138772205, | |
| "grad_norm": 6.522688388824463, | |
| "learning_rate": 4.9228199807682576e-05, | |
| "loss": 0.5851, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 0.046409231236398604, | |
| "grad_norm": 3.49273419380188, | |
| "learning_rate": 4.9227356310204636e-05, | |
| "loss": 0.5751, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 0.04645984108507516, | |
| "grad_norm": 14.866829872131348, | |
| "learning_rate": 4.9226512812726697e-05, | |
| "loss": 0.5211, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 0.04651045093375171, | |
| "grad_norm": 6.658441066741943, | |
| "learning_rate": 4.922566931524875e-05, | |
| "loss": 0.5325, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 0.04656106078242826, | |
| "grad_norm": 3.8592264652252197, | |
| "learning_rate": 4.922482581777081e-05, | |
| "loss": 0.5917, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.046611670631104815, | |
| "grad_norm": 10.734745025634766, | |
| "learning_rate": 4.9223982320292864e-05, | |
| "loss": 0.3665, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 0.04666228047978137, | |
| "grad_norm": 7.1689534187316895, | |
| "learning_rate": 4.9223138822814924e-05, | |
| "loss": 0.386, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 0.04671289032845792, | |
| "grad_norm": 4.963681697845459, | |
| "learning_rate": 4.9222295325336984e-05, | |
| "loss": 0.4437, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 0.04676350017713447, | |
| "grad_norm": 2.5440425872802734, | |
| "learning_rate": 4.922145182785904e-05, | |
| "loss": 0.3268, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 0.046814110025811026, | |
| "grad_norm": 14.03139877319336, | |
| "learning_rate": 4.92206083303811e-05, | |
| "loss": 0.7161, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.04686471987448758, | |
| "grad_norm": 7.766716480255127, | |
| "learning_rate": 4.921976483290315e-05, | |
| "loss": 0.4935, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 0.046915329723164124, | |
| "grad_norm": 13.12921142578125, | |
| "learning_rate": 4.921892133542521e-05, | |
| "loss": 0.4105, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 0.04696593957184068, | |
| "grad_norm": 3.441727876663208, | |
| "learning_rate": 4.921807783794727e-05, | |
| "loss": 0.8493, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 0.04701654942051723, | |
| "grad_norm": 6.923675060272217, | |
| "learning_rate": 4.9217234340469325e-05, | |
| "loss": 0.819, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 0.04706715926919378, | |
| "grad_norm": 8.306994438171387, | |
| "learning_rate": 4.9216390842991385e-05, | |
| "loss": 0.6548, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.047117769117870335, | |
| "grad_norm": 3.314638614654541, | |
| "learning_rate": 4.921554734551344e-05, | |
| "loss": 0.6389, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 0.04716837896654689, | |
| "grad_norm": 8.720133781433105, | |
| "learning_rate": 4.92147038480355e-05, | |
| "loss": 0.5129, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 0.04721898881522344, | |
| "grad_norm": 15.914939880371094, | |
| "learning_rate": 4.921386035055756e-05, | |
| "loss": 0.5298, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 0.047269598663899993, | |
| "grad_norm": 4.567658424377441, | |
| "learning_rate": 4.921301685307961e-05, | |
| "loss": 0.426, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 0.047320208512576546, | |
| "grad_norm": 2.2042078971862793, | |
| "learning_rate": 4.921217335560167e-05, | |
| "loss": 0.2168, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.0473708183612531, | |
| "grad_norm": 2.475092649459839, | |
| "learning_rate": 4.9211329858123726e-05, | |
| "loss": 0.5226, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 0.04742142820992965, | |
| "grad_norm": 3.972266435623169, | |
| "learning_rate": 4.9210486360645786e-05, | |
| "loss": 0.5745, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 0.047472038058606204, | |
| "grad_norm": 4.532939910888672, | |
| "learning_rate": 4.9209642863167846e-05, | |
| "loss": 0.6833, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 0.04752264790728276, | |
| "grad_norm": 7.014007568359375, | |
| "learning_rate": 4.92087993656899e-05, | |
| "loss": 0.5458, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 0.04757325775595931, | |
| "grad_norm": 8.639058113098145, | |
| "learning_rate": 4.920795586821196e-05, | |
| "loss": 0.698, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.04762386760463586, | |
| "grad_norm": 4.487074375152588, | |
| "learning_rate": 4.920711237073401e-05, | |
| "loss": 0.4481, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 0.047674477453312415, | |
| "grad_norm": 7.813543319702148, | |
| "learning_rate": 4.9206268873256074e-05, | |
| "loss": 0.5677, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 0.04772508730198897, | |
| "grad_norm": 14.59656047821045, | |
| "learning_rate": 4.920542537577813e-05, | |
| "loss": 0.5587, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 0.04777569715066552, | |
| "grad_norm": 7.641439437866211, | |
| "learning_rate": 4.920458187830019e-05, | |
| "loss": 0.9166, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 0.04782630699934207, | |
| "grad_norm": 11.967202186584473, | |
| "learning_rate": 4.920373838082225e-05, | |
| "loss": 0.6675, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.047876916848018626, | |
| "grad_norm": 3.4140710830688477, | |
| "learning_rate": 4.92028948833443e-05, | |
| "loss": 0.5182, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 0.04792752669669518, | |
| "grad_norm": 11.876256942749023, | |
| "learning_rate": 4.920205138586636e-05, | |
| "loss": 0.5383, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 0.04797813654537173, | |
| "grad_norm": 3.3766562938690186, | |
| "learning_rate": 4.9201207888388414e-05, | |
| "loss": 0.4677, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 0.048028746394048284, | |
| "grad_norm": 4.277167320251465, | |
| "learning_rate": 4.9200364390910475e-05, | |
| "loss": 0.4177, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 0.04807935624272484, | |
| "grad_norm": 5.8467936515808105, | |
| "learning_rate": 4.9199520893432535e-05, | |
| "loss": 0.5603, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.04812996609140139, | |
| "grad_norm": 5.7047038078308105, | |
| "learning_rate": 4.919867739595459e-05, | |
| "loss": 0.416, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 0.04818057594007794, | |
| "grad_norm": 8.13704776763916, | |
| "learning_rate": 4.919783389847665e-05, | |
| "loss": 0.2622, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 0.048231185788754495, | |
| "grad_norm": 7.6252923011779785, | |
| "learning_rate": 4.91969904009987e-05, | |
| "loss": 0.9887, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 0.04828179563743104, | |
| "grad_norm": 6.5191497802734375, | |
| "learning_rate": 4.919614690352076e-05, | |
| "loss": 0.6701, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 0.048332405486107594, | |
| "grad_norm": 98.16575622558594, | |
| "learning_rate": 4.919530340604282e-05, | |
| "loss": 1.0357, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.048383015334784146, | |
| "grad_norm": 3.221017837524414, | |
| "learning_rate": 4.9194459908564876e-05, | |
| "loss": 0.396, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 0.0484336251834607, | |
| "grad_norm": 4.679788112640381, | |
| "learning_rate": 4.9193616411086936e-05, | |
| "loss": 0.3766, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 0.04848423503213725, | |
| "grad_norm": 7.324192047119141, | |
| "learning_rate": 4.919277291360899e-05, | |
| "loss": 0.6765, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 0.048534844880813804, | |
| "grad_norm": 5.692389488220215, | |
| "learning_rate": 4.919192941613105e-05, | |
| "loss": 0.7648, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 0.04858545472949036, | |
| "grad_norm": 8.046274185180664, | |
| "learning_rate": 4.919108591865311e-05, | |
| "loss": 0.717, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.04863606457816691, | |
| "grad_norm": 8.431305885314941, | |
| "learning_rate": 4.919024242117516e-05, | |
| "loss": 0.5176, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 0.04868667442684346, | |
| "grad_norm": 4.852190017700195, | |
| "learning_rate": 4.918939892369722e-05, | |
| "loss": 0.4792, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 0.048737284275520015, | |
| "grad_norm": 3.7859811782836914, | |
| "learning_rate": 4.918855542621928e-05, | |
| "loss": 0.4534, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 0.04878789412419657, | |
| "grad_norm": 3.5033421516418457, | |
| "learning_rate": 4.918771192874134e-05, | |
| "loss": 0.5004, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 0.04883850397287312, | |
| "grad_norm": 19.82146453857422, | |
| "learning_rate": 4.91868684312634e-05, | |
| "loss": 0.7809, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.04888911382154967, | |
| "grad_norm": 7.877125263214111, | |
| "learning_rate": 4.918602493378545e-05, | |
| "loss": 0.4266, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 0.048939723670226226, | |
| "grad_norm": 9.093170166015625, | |
| "learning_rate": 4.918518143630751e-05, | |
| "loss": 0.7279, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 0.04899033351890278, | |
| "grad_norm": 6.574698448181152, | |
| "learning_rate": 4.9184337938829564e-05, | |
| "loss": 0.4833, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 0.04904094336757933, | |
| "grad_norm": 4.608344078063965, | |
| "learning_rate": 4.9183494441351624e-05, | |
| "loss": 0.5268, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 0.049091553216255884, | |
| "grad_norm": 8.013690948486328, | |
| "learning_rate": 4.9182650943873685e-05, | |
| "loss": 0.5434, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.04914216306493244, | |
| "grad_norm": 4.216137409210205, | |
| "learning_rate": 4.918180744639574e-05, | |
| "loss": 0.4351, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 0.04919277291360899, | |
| "grad_norm": 7.3226776123046875, | |
| "learning_rate": 4.91809639489178e-05, | |
| "loss": 0.3895, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 0.04924338276228554, | |
| "grad_norm": 4.8168840408325195, | |
| "learning_rate": 4.918012045143985e-05, | |
| "loss": 0.3544, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 0.049293992610962095, | |
| "grad_norm": 8.331216812133789, | |
| "learning_rate": 4.917927695396191e-05, | |
| "loss": 0.2721, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 0.04934460245963865, | |
| "grad_norm": 10.902067184448242, | |
| "learning_rate": 4.917843345648397e-05, | |
| "loss": 0.4973, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.0493952123083152, | |
| "grad_norm": 5.606088161468506, | |
| "learning_rate": 4.9177589959006025e-05, | |
| "loss": 0.5057, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 0.04944582215699175, | |
| "grad_norm": 14.297886848449707, | |
| "learning_rate": 4.9176746461528086e-05, | |
| "loss": 0.6632, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 0.049496432005668306, | |
| "grad_norm": 5.292407989501953, | |
| "learning_rate": 4.917590296405014e-05, | |
| "loss": 0.8784, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 0.04954704185434486, | |
| "grad_norm": 2.2982068061828613, | |
| "learning_rate": 4.91750594665722e-05, | |
| "loss": 0.3005, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 0.049597651703021405, | |
| "grad_norm": 5.021894931793213, | |
| "learning_rate": 4.917421596909426e-05, | |
| "loss": 0.5095, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.04964826155169796, | |
| "grad_norm": 6.863503932952881, | |
| "learning_rate": 4.917337247161631e-05, | |
| "loss": 0.5115, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 0.04969887140037451, | |
| "grad_norm": 8.457030296325684, | |
| "learning_rate": 4.917252897413837e-05, | |
| "loss": 0.6833, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 0.04974948124905106, | |
| "grad_norm": 7.3646674156188965, | |
| "learning_rate": 4.9171685476660426e-05, | |
| "loss": 0.6812, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 0.049800091097727615, | |
| "grad_norm": 4.478794574737549, | |
| "learning_rate": 4.917084197918249e-05, | |
| "loss": 0.3277, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 0.04985070094640417, | |
| "grad_norm": 8.332141876220703, | |
| "learning_rate": 4.916999848170454e-05, | |
| "loss": 0.3486, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.04990131079508072, | |
| "grad_norm": 5.418234348297119, | |
| "learning_rate": 4.91691549842266e-05, | |
| "loss": 0.5811, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 0.049951920643757274, | |
| "grad_norm": 7.849460124969482, | |
| "learning_rate": 4.916831148674866e-05, | |
| "loss": 0.9629, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 0.050002530492433826, | |
| "grad_norm": 4.812062740325928, | |
| "learning_rate": 4.9167467989270714e-05, | |
| "loss": 0.4392, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 0.05005314034111038, | |
| "grad_norm": 7.80704402923584, | |
| "learning_rate": 4.9166624491792774e-05, | |
| "loss": 0.9341, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 0.05010375018978693, | |
| "grad_norm": 4.2226243019104, | |
| "learning_rate": 4.916578099431483e-05, | |
| "loss": 0.8323, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.050154360038463484, | |
| "grad_norm": 5.1408867835998535, | |
| "learning_rate": 4.916493749683689e-05, | |
| "loss": 0.3631, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 0.05020496988714004, | |
| "grad_norm": 6.943533897399902, | |
| "learning_rate": 4.916409399935895e-05, | |
| "loss": 0.4674, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 0.05025557973581659, | |
| "grad_norm": 2.5346226692199707, | |
| "learning_rate": 4.9163250501881e-05, | |
| "loss": 0.3056, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 0.05030618958449314, | |
| "grad_norm": 7.4098944664001465, | |
| "learning_rate": 4.916240700440306e-05, | |
| "loss": 0.7281, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 0.050356799433169695, | |
| "grad_norm": 5.00679349899292, | |
| "learning_rate": 4.9161563506925115e-05, | |
| "loss": 0.5875, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.05040740928184625, | |
| "grad_norm": 7.140769004821777, | |
| "learning_rate": 4.9160720009447175e-05, | |
| "loss": 0.4304, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 0.0504580191305228, | |
| "grad_norm": 4.408178806304932, | |
| "learning_rate": 4.9159876511969235e-05, | |
| "loss": 0.3655, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 0.05050862897919935, | |
| "grad_norm": 6.3958353996276855, | |
| "learning_rate": 4.915903301449129e-05, | |
| "loss": 0.5034, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 0.050559238827875906, | |
| "grad_norm": 11.404850959777832, | |
| "learning_rate": 4.915818951701335e-05, | |
| "loss": 0.6704, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 0.05060984867655246, | |
| "grad_norm": 22.483257293701172, | |
| "learning_rate": 4.91573460195354e-05, | |
| "loss": 0.5152, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.05060984867655246, | |
| "eval_cer": 0.1947745752490278, | |
| "eval_loss": 0.306193083524704, | |
| "eval_runtime": 2641.4126, | |
| "eval_samples_per_second": 5.934, | |
| "eval_steps_per_second": 0.371, | |
| "step": 2000 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 118554, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 1000, | |
| "total_flos": 4.299636354947482e+19, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |