| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.031181202890604, | |
| "eval_steps": 500, | |
| "global_step": 63000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0016368085506878688, | |
| "grad_norm": 0.5328027606010437, | |
| "learning_rate": 3.600654664484452e-07, | |
| "loss": 1.6968, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0032736171013757376, | |
| "grad_norm": 0.5594077706336975, | |
| "learning_rate": 7.237679578105111e-07, | |
| "loss": 1.6883, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.004910425652063607, | |
| "grad_norm": 0.6636043787002563, | |
| "learning_rate": 1.087470449172577e-06, | |
| "loss": 1.6196, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.006547234202751475, | |
| "grad_norm": 0.6200364828109741, | |
| "learning_rate": 1.4511729405346428e-06, | |
| "loss": 1.511, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.008184042753439345, | |
| "grad_norm": 0.4777531623840332, | |
| "learning_rate": 1.8148754318967086e-06, | |
| "loss": 1.342, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.009820851304127213, | |
| "grad_norm": 0.3041970133781433, | |
| "learning_rate": 2.1785779232587743e-06, | |
| "loss": 1.2154, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.011457659854815082, | |
| "grad_norm": 0.21760690212249756, | |
| "learning_rate": 2.54228041462084e-06, | |
| "loss": 1.1427, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.01309446840550295, | |
| "grad_norm": 0.22987280786037445, | |
| "learning_rate": 2.9059829059829063e-06, | |
| "loss": 1.0943, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.014731276956190819, | |
| "grad_norm": 0.24943482875823975, | |
| "learning_rate": 3.269685397344972e-06, | |
| "loss": 1.0696, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.01636808550687869, | |
| "grad_norm": 0.2619542181491852, | |
| "learning_rate": 3.633387888707038e-06, | |
| "loss": 1.0318, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.018004894057566556, | |
| "grad_norm": 0.2811136841773987, | |
| "learning_rate": 3.997090380069103e-06, | |
| "loss": 1.0035, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.019641702608254426, | |
| "grad_norm": 0.3045084476470947, | |
| "learning_rate": 4.36079287143117e-06, | |
| "loss": 0.9726, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.021278511158942293, | |
| "grad_norm": 0.3168332278728485, | |
| "learning_rate": 4.7244953627932355e-06, | |
| "loss": 0.971, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.022915319709630164, | |
| "grad_norm": 0.33685848116874695, | |
| "learning_rate": 5.088197854155301e-06, | |
| "loss": 0.952, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.02455212826031803, | |
| "grad_norm": 0.3198516368865967, | |
| "learning_rate": 5.451900345517367e-06, | |
| "loss": 0.9385, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.0261889368110059, | |
| "grad_norm": 0.3457159101963043, | |
| "learning_rate": 5.815602836879432e-06, | |
| "loss": 0.9291, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.02782574536169377, | |
| "grad_norm": 0.3343696594238281, | |
| "learning_rate": 6.179305328241499e-06, | |
| "loss": 0.9251, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.029462553912381638, | |
| "grad_norm": 0.4662475287914276, | |
| "learning_rate": 6.543007819603565e-06, | |
| "loss": 0.9328, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.03109936246306951, | |
| "grad_norm": 0.3559871017932892, | |
| "learning_rate": 6.906710310965631e-06, | |
| "loss": 0.9126, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.03273617101375738, | |
| "grad_norm": 0.3852447271347046, | |
| "learning_rate": 7.270412802327696e-06, | |
| "loss": 0.9024, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.034372979564445245, | |
| "grad_norm": 0.36482807993888855, | |
| "learning_rate": 7.634115293689762e-06, | |
| "loss": 0.9086, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.03600978811513311, | |
| "grad_norm": 0.39493420720100403, | |
| "learning_rate": 7.997817785051828e-06, | |
| "loss": 0.9144, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.03764659666582098, | |
| "grad_norm": 0.4406372010707855, | |
| "learning_rate": 8.361520276413894e-06, | |
| "loss": 0.9067, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.03928340521650885, | |
| "grad_norm": 0.43684300780296326, | |
| "learning_rate": 8.72522276777596e-06, | |
| "loss": 0.898, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.04092021376719672, | |
| "grad_norm": 0.4949699342250824, | |
| "learning_rate": 9.088925259138026e-06, | |
| "loss": 0.8893, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.04255702231788459, | |
| "grad_norm": 0.4759005308151245, | |
| "learning_rate": 9.452627750500092e-06, | |
| "loss": 0.9036, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.04419383086857246, | |
| "grad_norm": 0.4733336567878723, | |
| "learning_rate": 9.816330241862157e-06, | |
| "loss": 0.9046, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.04583063941926033, | |
| "grad_norm": 0.5515408515930176, | |
| "learning_rate": 1.0180032733224223e-05, | |
| "loss": 0.8899, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.047467447969948194, | |
| "grad_norm": 0.5026727318763733, | |
| "learning_rate": 1.054373522458629e-05, | |
| "loss": 0.8868, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.04910425652063606, | |
| "grad_norm": 0.5517929196357727, | |
| "learning_rate": 1.0907437715948354e-05, | |
| "loss": 0.8905, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.050741065071323935, | |
| "grad_norm": 0.5139409899711609, | |
| "learning_rate": 1.127114020731042e-05, | |
| "loss": 0.8711, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.0523778736220118, | |
| "grad_norm": 0.5762068033218384, | |
| "learning_rate": 1.1634842698672486e-05, | |
| "loss": 0.9, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.05401468217269967, | |
| "grad_norm": 0.5540242791175842, | |
| "learning_rate": 1.1998545190034552e-05, | |
| "loss": 0.8854, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.05565149072338754, | |
| "grad_norm": 0.6651942133903503, | |
| "learning_rate": 1.236224768139662e-05, | |
| "loss": 0.875, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.05728829927407541, | |
| "grad_norm": 0.6157256364822388, | |
| "learning_rate": 1.2725950172758685e-05, | |
| "loss": 0.87, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.058925107824763276, | |
| "grad_norm": 0.6638494729995728, | |
| "learning_rate": 1.3089652664120751e-05, | |
| "loss": 0.8666, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.06056191637545114, | |
| "grad_norm": 0.6535647511482239, | |
| "learning_rate": 1.3453355155482817e-05, | |
| "loss": 0.8675, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.06219872492613902, | |
| "grad_norm": 0.7346630692481995, | |
| "learning_rate": 1.3817057646844883e-05, | |
| "loss": 0.8724, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.06383553347682688, | |
| "grad_norm": 0.7002882957458496, | |
| "learning_rate": 1.4180760138206948e-05, | |
| "loss": 0.8476, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.06547234202751476, | |
| "grad_norm": 0.6632655262947083, | |
| "learning_rate": 1.4544462629569014e-05, | |
| "loss": 0.8641, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.06710915057820262, | |
| "grad_norm": 0.7253566384315491, | |
| "learning_rate": 1.490816512093108e-05, | |
| "loss": 0.8611, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.06874595912889049, | |
| "grad_norm": 0.7651970386505127, | |
| "learning_rate": 1.5271867612293146e-05, | |
| "loss": 0.8597, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.07038276767957836, | |
| "grad_norm": 0.6781213879585266, | |
| "learning_rate": 1.563557010365521e-05, | |
| "loss": 0.844, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.07201957623026622, | |
| "grad_norm": 0.7465602159500122, | |
| "learning_rate": 1.5999272595017275e-05, | |
| "loss": 0.8558, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.0736563847809541, | |
| "grad_norm": 0.7796695828437805, | |
| "learning_rate": 1.6362975086379343e-05, | |
| "loss": 0.8533, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.07529319333164196, | |
| "grad_norm": 0.7622010111808777, | |
| "learning_rate": 1.6726677577741408e-05, | |
| "loss": 0.8414, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.07693000188232983, | |
| "grad_norm": 0.7499621510505676, | |
| "learning_rate": 1.7090380069103472e-05, | |
| "loss": 0.8459, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.0785668104330177, | |
| "grad_norm": 0.7822730541229248, | |
| "learning_rate": 1.745408256046554e-05, | |
| "loss": 0.8468, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.08020361898370557, | |
| "grad_norm": 0.7850978970527649, | |
| "learning_rate": 1.7817785051827608e-05, | |
| "loss": 0.8603, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.08184042753439344, | |
| "grad_norm": 0.8370286822319031, | |
| "learning_rate": 1.8181487543189672e-05, | |
| "loss": 0.837, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.08347723608508131, | |
| "grad_norm": 0.821024477481842, | |
| "learning_rate": 1.854519003455174e-05, | |
| "loss": 0.8464, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.08511404463576917, | |
| "grad_norm": 0.8516008257865906, | |
| "learning_rate": 1.8908892525913805e-05, | |
| "loss": 0.837, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.08675085318645705, | |
| "grad_norm": 0.7816336750984192, | |
| "learning_rate": 1.927259501727587e-05, | |
| "loss": 0.8471, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.08838766173714492, | |
| "grad_norm": 0.8347124457359314, | |
| "learning_rate": 1.9636297508637937e-05, | |
| "loss": 0.8333, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.09002447028783278, | |
| "grad_norm": 0.8995541334152222, | |
| "learning_rate": 2e-05, | |
| "loss": 0.8341, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.09166127883852065, | |
| "grad_norm": 0.9787241816520691, | |
| "learning_rate": 1.9999984387425675e-05, | |
| "loss": 0.8431, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.09329808738920851, | |
| "grad_norm": 0.8093689680099487, | |
| "learning_rate": 1.999993754975144e-05, | |
| "loss": 0.8325, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.09493489593989639, | |
| "grad_norm": 0.9042837023735046, | |
| "learning_rate": 1.999985948712355e-05, | |
| "loss": 0.828, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.09657170449058426, | |
| "grad_norm": 0.9188331961631775, | |
| "learning_rate": 1.999975019978576e-05, | |
| "loss": 0.8291, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.09820851304127212, | |
| "grad_norm": 0.8699648380279541, | |
| "learning_rate": 1.9999609688079316e-05, | |
| "loss": 0.8277, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.09984532159196, | |
| "grad_norm": 0.9138243794441223, | |
| "learning_rate": 1.999943795244297e-05, | |
| "loss": 0.8367, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.10148213014264787, | |
| "grad_norm": 0.9293233156204224, | |
| "learning_rate": 1.9999234993412973e-05, | |
| "loss": 0.8281, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.10311893869333573, | |
| "grad_norm": 0.9346773624420166, | |
| "learning_rate": 1.999900081162306e-05, | |
| "loss": 0.8323, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.1047557472440236, | |
| "grad_norm": 0.9332927465438843, | |
| "learning_rate": 1.999873540780447e-05, | |
| "loss": 0.8259, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.10639255579471148, | |
| "grad_norm": 0.8887437582015991, | |
| "learning_rate": 1.9998438782785937e-05, | |
| "loss": 0.8305, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.10802936434539934, | |
| "grad_norm": 0.9184074401855469, | |
| "learning_rate": 1.999811093749367e-05, | |
| "loss": 0.829, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.10966617289608721, | |
| "grad_norm": 0.8532683849334717, | |
| "learning_rate": 1.999775187295137e-05, | |
| "loss": 0.8275, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.11130298144677508, | |
| "grad_norm": 0.9298515915870667, | |
| "learning_rate": 1.9997361590280225e-05, | |
| "loss": 0.8192, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.11293978999746294, | |
| "grad_norm": 0.9617123603820801, | |
| "learning_rate": 1.9996940090698896e-05, | |
| "loss": 0.8198, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.11457659854815082, | |
| "grad_norm": 1.0112113952636719, | |
| "learning_rate": 1.9996487375523524e-05, | |
| "loss": 0.8239, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.11621340709883868, | |
| "grad_norm": 0.9226319193840027, | |
| "learning_rate": 1.9996003446167718e-05, | |
| "loss": 0.8281, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.11785021564952655, | |
| "grad_norm": 1.0199968814849854, | |
| "learning_rate": 1.999548830414255e-05, | |
| "loss": 0.82, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.11948702420021443, | |
| "grad_norm": 0.9594390988349915, | |
| "learning_rate": 1.999494195105657e-05, | |
| "loss": 0.8139, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.12112383275090229, | |
| "grad_norm": 0.9685386419296265, | |
| "learning_rate": 1.9994364388615763e-05, | |
| "loss": 0.8193, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.12276064130159016, | |
| "grad_norm": 0.9797342419624329, | |
| "learning_rate": 1.999375561862358e-05, | |
| "loss": 0.815, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.12439744985227803, | |
| "grad_norm": 1.0541061162948608, | |
| "learning_rate": 1.9993115642980912e-05, | |
| "loss": 0.8239, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.1260342584029659, | |
| "grad_norm": 0.9543519616127014, | |
| "learning_rate": 1.99924444636861e-05, | |
| "loss": 0.8145, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.12767106695365377, | |
| "grad_norm": 0.9379186630249023, | |
| "learning_rate": 1.99917420828349e-05, | |
| "loss": 0.817, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.12930787550434164, | |
| "grad_norm": 0.9919012188911438, | |
| "learning_rate": 1.9991008502620515e-05, | |
| "loss": 0.8208, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.13094468405502951, | |
| "grad_norm": 0.9344952702522278, | |
| "learning_rate": 1.999024372533356e-05, | |
| "loss": 0.8167, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.13258149260571736, | |
| "grad_norm": 0.9583950638771057, | |
| "learning_rate": 1.9989447753362058e-05, | |
| "loss": 0.8125, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.13421830115640523, | |
| "grad_norm": 0.9945580363273621, | |
| "learning_rate": 1.998862058919145e-05, | |
| "loss": 0.8225, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.1358551097070931, | |
| "grad_norm": 0.9583763480186462, | |
| "learning_rate": 1.9987762235404566e-05, | |
| "loss": 0.8105, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.13749191825778098, | |
| "grad_norm": 1.025468349456787, | |
| "learning_rate": 1.998687269468162e-05, | |
| "loss": 0.8107, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.13912872680846886, | |
| "grad_norm": 1.0057779550552368, | |
| "learning_rate": 1.998595196980023e-05, | |
| "loss": 0.8138, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.14076553535915673, | |
| "grad_norm": 0.9300206899642944, | |
| "learning_rate": 1.9985000063635365e-05, | |
| "loss": 0.8207, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.14240234390984458, | |
| "grad_norm": 1.0241742134094238, | |
| "learning_rate": 1.9984016979159368e-05, | |
| "loss": 0.8046, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.14403915246053245, | |
| "grad_norm": 0.9688097238540649, | |
| "learning_rate": 1.9983002719441935e-05, | |
| "loss": 0.8193, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.14567596101122032, | |
| "grad_norm": 0.9877735376358032, | |
| "learning_rate": 1.9981957287650107e-05, | |
| "loss": 0.8003, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.1473127695619082, | |
| "grad_norm": 0.9533541202545166, | |
| "learning_rate": 1.9980880687048257e-05, | |
| "loss": 0.8089, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.14894957811259607, | |
| "grad_norm": 1.0934607982635498, | |
| "learning_rate": 1.997977292099809e-05, | |
| "loss": 0.7971, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.15058638666328392, | |
| "grad_norm": 0.9715205430984497, | |
| "learning_rate": 1.9978633992958624e-05, | |
| "loss": 0.8194, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.1522231952139718, | |
| "grad_norm": 0.9527362585067749, | |
| "learning_rate": 1.9977463906486175e-05, | |
| "loss": 0.8095, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.15386000376465966, | |
| "grad_norm": 1.0439358949661255, | |
| "learning_rate": 1.9976262665234357e-05, | |
| "loss": 0.7997, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.15549681231534754, | |
| "grad_norm": 1.1087926626205444, | |
| "learning_rate": 1.9975030272954066e-05, | |
| "loss": 0.8012, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.1571336208660354, | |
| "grad_norm": 1.0532102584838867, | |
| "learning_rate": 1.9973766733493458e-05, | |
| "loss": 0.8006, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.15877042941672329, | |
| "grad_norm": 0.9958882331848145, | |
| "learning_rate": 1.997247205079796e-05, | |
| "loss": 0.8138, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.16040723796741113, | |
| "grad_norm": 1.0133436918258667, | |
| "learning_rate": 1.9971146228910236e-05, | |
| "loss": 0.7942, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.162044046518099, | |
| "grad_norm": 0.9266718029975891, | |
| "learning_rate": 1.9969789271970187e-05, | |
| "loss": 0.7917, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.16368085506878688, | |
| "grad_norm": 1.0468189716339111, | |
| "learning_rate": 1.9968401184214924e-05, | |
| "loss": 0.8012, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.16531766361947475, | |
| "grad_norm": 1.0444200038909912, | |
| "learning_rate": 1.9966981969978782e-05, | |
| "loss": 0.7979, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.16695447217016263, | |
| "grad_norm": 1.0317082405090332, | |
| "learning_rate": 1.9965531633693268e-05, | |
| "loss": 0.8209, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.16859128072085047, | |
| "grad_norm": 1.0699563026428223, | |
| "learning_rate": 1.9964050179887088e-05, | |
| "loss": 0.8035, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.17022808927153835, | |
| "grad_norm": 0.9806187748908997, | |
| "learning_rate": 1.9962537613186096e-05, | |
| "loss": 0.7957, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.17186489782222622, | |
| "grad_norm": 1.0728228092193604, | |
| "learning_rate": 1.996099393831331e-05, | |
| "loss": 0.791, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.1735017063729141, | |
| "grad_norm": 1.028189778327942, | |
| "learning_rate": 1.9959419160088874e-05, | |
| "loss": 0.7964, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.17513851492360197, | |
| "grad_norm": 1.0126999616622925, | |
| "learning_rate": 1.9957813283430054e-05, | |
| "loss": 0.799, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.17677532347428984, | |
| "grad_norm": 0.96955406665802, | |
| "learning_rate": 1.995617631335123e-05, | |
| "loss": 0.8118, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.1784121320249777, | |
| "grad_norm": 1.0654776096343994, | |
| "learning_rate": 1.9954508254963865e-05, | |
| "loss": 0.8084, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.18004894057566556, | |
| "grad_norm": 0.9537600874900818, | |
| "learning_rate": 1.9952809113476493e-05, | |
| "loss": 0.8011, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.18168574912635344, | |
| "grad_norm": 0.9695281982421875, | |
| "learning_rate": 1.9951078894194708e-05, | |
| "loss": 0.8054, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.1833225576770413, | |
| "grad_norm": 1.0722426176071167, | |
| "learning_rate": 1.9949317602521144e-05, | |
| "loss": 0.7917, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.18495936622772918, | |
| "grad_norm": 0.9706518054008484, | |
| "learning_rate": 1.9947525243955467e-05, | |
| "loss": 0.8055, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.18659617477841703, | |
| "grad_norm": 0.9769388437271118, | |
| "learning_rate": 1.994570182409434e-05, | |
| "loss": 0.7981, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.1882329833291049, | |
| "grad_norm": 0.9185972809791565, | |
| "learning_rate": 1.9943847348631415e-05, | |
| "loss": 0.7907, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.18986979187979278, | |
| "grad_norm": 1.0683258771896362, | |
| "learning_rate": 1.9941961823357322e-05, | |
| "loss": 0.8021, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.19150660043048065, | |
| "grad_norm": 0.9599470496177673, | |
| "learning_rate": 1.9940045254159644e-05, | |
| "loss": 0.7923, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.19314340898116852, | |
| "grad_norm": 0.9822320938110352, | |
| "learning_rate": 1.9938097647022895e-05, | |
| "loss": 0.7864, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.1947802175318564, | |
| "grad_norm": 1.180939793586731, | |
| "learning_rate": 1.9936119008028503e-05, | |
| "loss": 0.7841, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.19641702608254424, | |
| "grad_norm": 1.1611251831054688, | |
| "learning_rate": 1.9934109343354808e-05, | |
| "loss": 0.7855, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.19805383463323212, | |
| "grad_norm": 1.0176281929016113, | |
| "learning_rate": 1.9932068659277006e-05, | |
| "loss": 0.7936, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.19969064318392, | |
| "grad_norm": 1.05084228515625, | |
| "learning_rate": 1.992999696216717e-05, | |
| "loss": 0.7856, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.20132745173460787, | |
| "grad_norm": 1.1582859754562378, | |
| "learning_rate": 1.9927894258494204e-05, | |
| "loss": 0.8064, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.20296426028529574, | |
| "grad_norm": 0.9974379539489746, | |
| "learning_rate": 1.992576055482383e-05, | |
| "loss": 0.7923, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.2046010688359836, | |
| "grad_norm": 1.0076924562454224, | |
| "learning_rate": 1.9923595857818573e-05, | |
| "loss": 0.801, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.20623787738667146, | |
| "grad_norm": 1.104923129081726, | |
| "learning_rate": 1.9921400174237732e-05, | |
| "loss": 0.8053, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.20787468593735933, | |
| "grad_norm": 1.0884004831314087, | |
| "learning_rate": 1.9919173510937355e-05, | |
| "loss": 0.7948, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.2095114944880472, | |
| "grad_norm": 0.9803980588912964, | |
| "learning_rate": 1.9916915874870234e-05, | |
| "loss": 0.791, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.21114830303873508, | |
| "grad_norm": 1.0630168914794922, | |
| "learning_rate": 1.9914627273085876e-05, | |
| "loss": 0.7813, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.21278511158942295, | |
| "grad_norm": 1.0575711727142334, | |
| "learning_rate": 1.9912307712730468e-05, | |
| "loss": 0.7862, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.2144219201401108, | |
| "grad_norm": 1.0258235931396484, | |
| "learning_rate": 1.9909957201046875e-05, | |
| "loss": 0.7855, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.21605872869079867, | |
| "grad_norm": 0.970610499382019, | |
| "learning_rate": 1.9907575745374605e-05, | |
| "loss": 0.7845, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.21769553724148655, | |
| "grad_norm": 1.0707366466522217, | |
| "learning_rate": 1.9905163353149787e-05, | |
| "loss": 0.7986, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.21933234579217442, | |
| "grad_norm": 0.9396125674247742, | |
| "learning_rate": 1.9902720031905153e-05, | |
| "loss": 0.7798, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.2209691543428623, | |
| "grad_norm": 1.0123385190963745, | |
| "learning_rate": 1.9900245789270006e-05, | |
| "loss": 0.7866, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.22260596289355017, | |
| "grad_norm": 0.9208526015281677, | |
| "learning_rate": 1.989774063297021e-05, | |
| "loss": 0.79, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.22424277144423801, | |
| "grad_norm": 1.0145132541656494, | |
| "learning_rate": 1.989520457082815e-05, | |
| "loss": 0.7826, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.2258795799949259, | |
| "grad_norm": 0.9474859237670898, | |
| "learning_rate": 1.9892637610762723e-05, | |
| "loss": 0.7904, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.22751638854561376, | |
| "grad_norm": 0.997414767742157, | |
| "learning_rate": 1.9890039760789294e-05, | |
| "loss": 0.7863, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.22915319709630164, | |
| "grad_norm": 1.0312907695770264, | |
| "learning_rate": 1.9887411029019686e-05, | |
| "loss": 0.7825, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.2307900056469895, | |
| "grad_norm": 1.019665002822876, | |
| "learning_rate": 1.9884751423662162e-05, | |
| "loss": 0.7746, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.23242681419767736, | |
| "grad_norm": 0.9788889288902283, | |
| "learning_rate": 1.9882060953021375e-05, | |
| "loss": 0.7805, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.23406362274836523, | |
| "grad_norm": 1.1468379497528076, | |
| "learning_rate": 1.9879339625498356e-05, | |
| "loss": 0.7783, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.2357004312990531, | |
| "grad_norm": 0.9630206823348999, | |
| "learning_rate": 1.9876587449590496e-05, | |
| "loss": 0.7785, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.23733723984974098, | |
| "grad_norm": 1.0484507083892822, | |
| "learning_rate": 1.98738044338915e-05, | |
| "loss": 0.7577, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.23897404840042885, | |
| "grad_norm": 0.9262145161628723, | |
| "learning_rate": 1.987099058709138e-05, | |
| "loss": 0.7847, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.24061085695111672, | |
| "grad_norm": 1.0156426429748535, | |
| "learning_rate": 1.9868145917976412e-05, | |
| "loss": 0.7754, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.24224766550180457, | |
| "grad_norm": 1.0557153224945068, | |
| "learning_rate": 1.986527043542912e-05, | |
| "loss": 0.783, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.24388447405249244, | |
| "grad_norm": 0.9480391144752502, | |
| "learning_rate": 1.9862364148428243e-05, | |
| "loss": 0.7795, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.24552128260318032, | |
| "grad_norm": 1.1189950704574585, | |
| "learning_rate": 1.9859427066048694e-05, | |
| "loss": 0.773, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.2471580911538682, | |
| "grad_norm": 1.0406650304794312, | |
| "learning_rate": 1.985645919746157e-05, | |
| "loss": 0.7815, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.24879489970455607, | |
| "grad_norm": 1.0539467334747314, | |
| "learning_rate": 1.985346055193408e-05, | |
| "loss": 0.7832, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.2504317082552439, | |
| "grad_norm": 1.0707350969314575, | |
| "learning_rate": 1.9850431138829537e-05, | |
| "loss": 0.7775, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.2520685168059318, | |
| "grad_norm": 1.0518571138381958, | |
| "learning_rate": 1.9847370967607332e-05, | |
| "loss": 0.7692, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.25370532535661966, | |
| "grad_norm": 1.038328766822815, | |
| "learning_rate": 1.9844280047822892e-05, | |
| "loss": 0.7812, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.25534213390730753, | |
| "grad_norm": 1.0571229457855225, | |
| "learning_rate": 1.984115838912766e-05, | |
| "loss": 0.7773, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.2569789424579954, | |
| "grad_norm": 1.0450866222381592, | |
| "learning_rate": 1.9838006001269064e-05, | |
| "loss": 0.7789, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.2586157510086833, | |
| "grad_norm": 1.107710838317871, | |
| "learning_rate": 1.9834822894090478e-05, | |
| "loss": 0.7628, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.26025255955937115, | |
| "grad_norm": 1.0595227479934692, | |
| "learning_rate": 1.9831609077531205e-05, | |
| "loss": 0.7805, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.26188936811005903, | |
| "grad_norm": 1.0978327989578247, | |
| "learning_rate": 1.982836456162644e-05, | |
| "loss": 0.7779, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.2635261766607469, | |
| "grad_norm": 1.0871798992156982, | |
| "learning_rate": 1.982508935650722e-05, | |
| "loss": 0.7696, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.2651629852114347, | |
| "grad_norm": 1.0791369676589966, | |
| "learning_rate": 1.982178347240043e-05, | |
| "loss": 0.7701, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.2667997937621226, | |
| "grad_norm": 1.095301866531372, | |
| "learning_rate": 1.981844691962874e-05, | |
| "loss": 0.783, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.26843660231281047, | |
| "grad_norm": 1.1223257780075073, | |
| "learning_rate": 1.9815079708610588e-05, | |
| "loss": 0.7785, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.27007341086349834, | |
| "grad_norm": 1.0025781393051147, | |
| "learning_rate": 1.9811681849860137e-05, | |
| "loss": 0.7787, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.2717102194141862, | |
| "grad_norm": 1.1232304573059082, | |
| "learning_rate": 1.9808253353987252e-05, | |
| "loss": 0.7655, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.2733470279648741, | |
| "grad_norm": 0.9625865817070007, | |
| "learning_rate": 1.9804794231697464e-05, | |
| "loss": 0.785, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.27498383651556196, | |
| "grad_norm": 1.1022255420684814, | |
| "learning_rate": 1.980130449379193e-05, | |
| "loss": 0.7681, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.27662064506624984, | |
| "grad_norm": 1.0605260133743286, | |
| "learning_rate": 1.9797784151167417e-05, | |
| "loss": 0.7686, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.2782574536169377, | |
| "grad_norm": 1.0693503618240356, | |
| "learning_rate": 1.9794233214816237e-05, | |
| "loss": 0.7653, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.2798942621676256, | |
| "grad_norm": 1.0027199983596802, | |
| "learning_rate": 1.979065169582625e-05, | |
| "loss": 0.7802, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.28153107071831346, | |
| "grad_norm": 1.002388834953308, | |
| "learning_rate": 1.9787039605380792e-05, | |
| "loss": 0.7668, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.2831678792690013, | |
| "grad_norm": 1.0847641229629517, | |
| "learning_rate": 1.9783396954758682e-05, | |
| "loss": 0.7685, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.28480468781968915, | |
| "grad_norm": 1.1153062582015991, | |
| "learning_rate": 1.9779723755334142e-05, | |
| "loss": 0.7761, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.286441496370377, | |
| "grad_norm": 1.0675033330917358, | |
| "learning_rate": 1.9776020018576794e-05, | |
| "loss": 0.7637, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.2880783049210649, | |
| "grad_norm": 1.0875293016433716, | |
| "learning_rate": 1.9772285756051613e-05, | |
| "loss": 0.7689, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.28971511347175277, | |
| "grad_norm": 1.135380744934082, | |
| "learning_rate": 1.9768520979418885e-05, | |
| "loss": 0.7763, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.29135192202244065, | |
| "grad_norm": 1.0305795669555664, | |
| "learning_rate": 1.9764725700434183e-05, | |
| "loss": 0.7688, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.2929887305731285, | |
| "grad_norm": 1.0471090078353882, | |
| "learning_rate": 1.976089993094832e-05, | |
| "loss": 0.7573, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.2946255391238164, | |
| "grad_norm": 1.0096269845962524, | |
| "learning_rate": 1.9757043682907325e-05, | |
| "loss": 0.7622, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.29626234767450427, | |
| "grad_norm": 1.103242039680481, | |
| "learning_rate": 1.9753156968352388e-05, | |
| "loss": 0.7573, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.29789915622519214, | |
| "grad_norm": 1.1128453016281128, | |
| "learning_rate": 1.9749239799419827e-05, | |
| "loss": 0.7692, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.29953596477588, | |
| "grad_norm": 1.0762085914611816, | |
| "learning_rate": 1.974529218834106e-05, | |
| "loss": 0.7838, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.30117277332656783, | |
| "grad_norm": 1.0150110721588135, | |
| "learning_rate": 1.9741314147442573e-05, | |
| "loss": 0.773, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.3028095818772557, | |
| "grad_norm": 1.0824315547943115, | |
| "learning_rate": 1.9737305689145842e-05, | |
| "loss": 0.7636, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.3044463904279436, | |
| "grad_norm": 1.2597285509109497, | |
| "learning_rate": 1.973326682596735e-05, | |
| "loss": 0.7688, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.30608319897863145, | |
| "grad_norm": 1.112971544265747, | |
| "learning_rate": 1.97291975705185e-05, | |
| "loss": 0.762, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.30772000752931933, | |
| "grad_norm": 1.11709725856781, | |
| "learning_rate": 1.9725097935505607e-05, | |
| "loss": 0.7674, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.3093568160800072, | |
| "grad_norm": 1.0609350204467773, | |
| "learning_rate": 1.972096793372984e-05, | |
| "loss": 0.7603, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.3109936246306951, | |
| "grad_norm": 1.111243486404419, | |
| "learning_rate": 1.9716807578087193e-05, | |
| "loss": 0.7572, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.31263043318138295, | |
| "grad_norm": 0.9914565086364746, | |
| "learning_rate": 1.971261688156843e-05, | |
| "loss": 0.7558, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.3142672417320708, | |
| "grad_norm": 1.030030369758606, | |
| "learning_rate": 1.9708395857259077e-05, | |
| "loss": 0.7558, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.3159040502827587, | |
| "grad_norm": 1.1039714813232422, | |
| "learning_rate": 1.9704144518339336e-05, | |
| "loss": 0.7507, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.31754085883344657, | |
| "grad_norm": 1.0048165321350098, | |
| "learning_rate": 1.969986287808408e-05, | |
| "loss": 0.7806, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.3191776673841344, | |
| "grad_norm": 1.2964001893997192, | |
| "learning_rate": 1.969555094986279e-05, | |
| "loss": 0.7504, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.32081447593482226, | |
| "grad_norm": 1.198273777961731, | |
| "learning_rate": 1.9691208747139527e-05, | |
| "loss": 0.7597, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.32245128448551014, | |
| "grad_norm": 1.0260130167007446, | |
| "learning_rate": 1.968683628347289e-05, | |
| "loss": 0.7571, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.324088093036198, | |
| "grad_norm": 1.1643099784851074, | |
| "learning_rate": 1.9682433572515952e-05, | |
| "loss": 0.7712, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.3257249015868859, | |
| "grad_norm": 1.1653162240982056, | |
| "learning_rate": 1.9678000628016248e-05, | |
| "loss": 0.7599, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.32736171013757376, | |
| "grad_norm": 1.5513461828231812, | |
| "learning_rate": 1.9673537463815718e-05, | |
| "loss": 0.7673, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.32899851868826163, | |
| "grad_norm": 1.138498306274414, | |
| "learning_rate": 1.9669044093850652e-05, | |
| "loss": 0.7521, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.3306353272389495, | |
| "grad_norm": 1.0548768043518066, | |
| "learning_rate": 1.9664520532151664e-05, | |
| "loss": 0.7596, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.3322721357896374, | |
| "grad_norm": 1.0597394704818726, | |
| "learning_rate": 1.965996679284365e-05, | |
| "loss": 0.7586, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.33390894434032525, | |
| "grad_norm": 1.1359139680862427, | |
| "learning_rate": 1.965538289014572e-05, | |
| "loss": 0.7618, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.3355457528910131, | |
| "grad_norm": 1.1026830673217773, | |
| "learning_rate": 1.9650768838371182e-05, | |
| "loss": 0.7613, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.33718256144170095, | |
| "grad_norm": 1.0065330266952515, | |
| "learning_rate": 1.9646124651927484e-05, | |
| "loss": 0.7394, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.3388193699923888, | |
| "grad_norm": 0.9368694424629211, | |
| "learning_rate": 1.964145034531616e-05, | |
| "loss": 0.761, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.3404561785430767, | |
| "grad_norm": 0.9686558246612549, | |
| "learning_rate": 1.9636745933132807e-05, | |
| "loss": 0.7597, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.34209298709376457, | |
| "grad_norm": 1.114066243171692, | |
| "learning_rate": 1.9632011430067024e-05, | |
| "loss": 0.7675, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.34372979564445244, | |
| "grad_norm": 1.1572498083114624, | |
| "learning_rate": 1.9627246850902363e-05, | |
| "loss": 0.7576, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.3453666041951403, | |
| "grad_norm": 1.0342215299606323, | |
| "learning_rate": 1.9622452210516296e-05, | |
| "loss": 0.7629, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.3470034127458282, | |
| "grad_norm": 1.0652525424957275, | |
| "learning_rate": 1.9617627523880158e-05, | |
| "loss": 0.7636, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.34864022129651606, | |
| "grad_norm": 1.048869013786316, | |
| "learning_rate": 1.9612772806059104e-05, | |
| "loss": 0.7625, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.35027702984720394, | |
| "grad_norm": 1.1751947402954102, | |
| "learning_rate": 1.9607888072212062e-05, | |
| "loss": 0.7475, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.3519138383978918, | |
| "grad_norm": 1.2830709218978882, | |
| "learning_rate": 1.9602973337591688e-05, | |
| "loss": 0.7558, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.3535506469485797, | |
| "grad_norm": 1.1591740846633911, | |
| "learning_rate": 1.9598028617544313e-05, | |
| "loss": 0.7435, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.3551874554992675, | |
| "grad_norm": 0.9801552295684814, | |
| "learning_rate": 1.95930539275099e-05, | |
| "loss": 0.7621, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.3568242640499554, | |
| "grad_norm": 1.126760721206665, | |
| "learning_rate": 1.958804928302199e-05, | |
| "loss": 0.7672, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.35846107260064325, | |
| "grad_norm": 1.0655152797698975, | |
| "learning_rate": 1.958301469970766e-05, | |
| "loss": 0.7491, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.3600978811513311, | |
| "grad_norm": 1.1613372564315796, | |
| "learning_rate": 1.9577950193287475e-05, | |
| "loss": 0.7733, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.361734689702019, | |
| "grad_norm": 0.9363147020339966, | |
| "learning_rate": 1.9572855779575427e-05, | |
| "loss": 0.7522, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.36337149825270687, | |
| "grad_norm": 1.1021246910095215, | |
| "learning_rate": 1.9567731474478903e-05, | |
| "loss": 0.7539, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.36500830680339474, | |
| "grad_norm": 1.084695816040039, | |
| "learning_rate": 1.9562577293998616e-05, | |
| "loss": 0.7514, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.3666451153540826, | |
| "grad_norm": 1.1221933364868164, | |
| "learning_rate": 1.9557393254228575e-05, | |
| "loss": 0.7608, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.3682819239047705, | |
| "grad_norm": 1.073371410369873, | |
| "learning_rate": 1.9552179371356024e-05, | |
| "loss": 0.7509, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.36991873245545837, | |
| "grad_norm": 1.124243140220642, | |
| "learning_rate": 1.9546935661661382e-05, | |
| "loss": 0.7552, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.37155554100614624, | |
| "grad_norm": 1.0397138595581055, | |
| "learning_rate": 1.9541662141518222e-05, | |
| "loss": 0.7451, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.37319234955683406, | |
| "grad_norm": 1.0600475072860718, | |
| "learning_rate": 1.9536358827393177e-05, | |
| "loss": 0.7358, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.37482915810752193, | |
| "grad_norm": 1.1461478471755981, | |
| "learning_rate": 1.953102573584593e-05, | |
| "loss": 0.7513, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.3764659666582098, | |
| "grad_norm": 1.093103051185608, | |
| "learning_rate": 1.952566288352914e-05, | |
| "loss": 0.7369, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.3781027752088977, | |
| "grad_norm": 1.2357380390167236, | |
| "learning_rate": 1.952027028718839e-05, | |
| "loss": 0.7628, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.37973958375958555, | |
| "grad_norm": 0.9737277030944824, | |
| "learning_rate": 1.9514847963662144e-05, | |
| "loss": 0.7358, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.3813763923102734, | |
| "grad_norm": 1.0810784101486206, | |
| "learning_rate": 1.9509395929881683e-05, | |
| "loss": 0.7431, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.3830132008609613, | |
| "grad_norm": 1.0600659847259521, | |
| "learning_rate": 1.9503914202871072e-05, | |
| "loss": 0.7465, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.3846500094116492, | |
| "grad_norm": 1.129676342010498, | |
| "learning_rate": 1.9498402799747077e-05, | |
| "loss": 0.746, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.38628681796233705, | |
| "grad_norm": 1.0627739429473877, | |
| "learning_rate": 1.9492861737719145e-05, | |
| "loss": 0.7517, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.3879236265130249, | |
| "grad_norm": 1.0382601022720337, | |
| "learning_rate": 1.9487291034089316e-05, | |
| "loss": 0.7466, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.3895604350637128, | |
| "grad_norm": 1.0782064199447632, | |
| "learning_rate": 1.9481690706252198e-05, | |
| "loss": 0.7436, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.39119724361440067, | |
| "grad_norm": 1.052713394165039, | |
| "learning_rate": 1.94760607716949e-05, | |
| "loss": 0.7363, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.3928340521650885, | |
| "grad_norm": 1.0485634803771973, | |
| "learning_rate": 1.947040124799697e-05, | |
| "loss": 0.7491, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.39447086071577636, | |
| "grad_norm": 1.1206567287445068, | |
| "learning_rate": 1.9464712152830368e-05, | |
| "loss": 0.7372, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.39610766926646424, | |
| "grad_norm": 1.0319308042526245, | |
| "learning_rate": 1.9458993503959368e-05, | |
| "loss": 0.7493, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.3977444778171521, | |
| "grad_norm": 1.1401089429855347, | |
| "learning_rate": 1.9453245319240533e-05, | |
| "loss": 0.7693, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.39938128636784, | |
| "grad_norm": 1.2440853118896484, | |
| "learning_rate": 1.944746761662266e-05, | |
| "loss": 0.7477, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.40101809491852786, | |
| "grad_norm": 1.1666104793548584, | |
| "learning_rate": 1.9441660414146715e-05, | |
| "loss": 0.7364, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.40265490346921573, | |
| "grad_norm": 1.0812019109725952, | |
| "learning_rate": 1.9435823729945768e-05, | |
| "loss": 0.7278, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.4042917120199036, | |
| "grad_norm": 1.1338680982589722, | |
| "learning_rate": 1.9429957582244957e-05, | |
| "loss": 0.7396, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.4059285205705915, | |
| "grad_norm": 1.0170310735702515, | |
| "learning_rate": 1.942406198936141e-05, | |
| "loss": 0.7373, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.40756532912127935, | |
| "grad_norm": 1.0910414457321167, | |
| "learning_rate": 1.941813696970421e-05, | |
| "loss": 0.743, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.4092021376719672, | |
| "grad_norm": 0.9840279221534729, | |
| "learning_rate": 1.9412182541774312e-05, | |
| "loss": 0.7432, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.41083894622265504, | |
| "grad_norm": 1.1482113599777222, | |
| "learning_rate": 1.9406198724164515e-05, | |
| "loss": 0.7457, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.4124757547733429, | |
| "grad_norm": 0.9647344946861267, | |
| "learning_rate": 1.9400185535559366e-05, | |
| "loss": 0.7494, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.4141125633240308, | |
| "grad_norm": 1.1271613836288452, | |
| "learning_rate": 1.9394142994735147e-05, | |
| "loss": 0.7358, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.41574937187471867, | |
| "grad_norm": 1.1209514141082764, | |
| "learning_rate": 1.9388071120559774e-05, | |
| "loss": 0.7477, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.41738618042540654, | |
| "grad_norm": 1.1221638917922974, | |
| "learning_rate": 1.9381969931992768e-05, | |
| "loss": 0.7401, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.4190229889760944, | |
| "grad_norm": 1.1341800689697266, | |
| "learning_rate": 1.937583944808518e-05, | |
| "loss": 0.7341, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.4206597975267823, | |
| "grad_norm": 1.0561330318450928, | |
| "learning_rate": 1.9369679687979538e-05, | |
| "loss": 0.7427, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.42229660607747016, | |
| "grad_norm": 1.0445774793624878, | |
| "learning_rate": 1.9363490670909788e-05, | |
| "loss": 0.7485, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.42393341462815803, | |
| "grad_norm": 1.1463161706924438, | |
| "learning_rate": 1.9357272416201214e-05, | |
| "loss": 0.7345, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.4255702231788459, | |
| "grad_norm": 1.1426818370819092, | |
| "learning_rate": 1.9351024943270426e-05, | |
| "loss": 0.7369, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.4272070317295338, | |
| "grad_norm": 1.0911140441894531, | |
| "learning_rate": 1.934474827162524e-05, | |
| "loss": 0.7472, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.4288438402802216, | |
| "grad_norm": 1.0775692462921143, | |
| "learning_rate": 1.9338442420864663e-05, | |
| "loss": 0.7401, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.4304806488309095, | |
| "grad_norm": 1.136518955230713, | |
| "learning_rate": 1.9332107410678805e-05, | |
| "loss": 0.7355, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.43211745738159735, | |
| "grad_norm": 1.085319995880127, | |
| "learning_rate": 1.932574326084883e-05, | |
| "loss": 0.7485, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.4337542659322852, | |
| "grad_norm": 1.034986972808838, | |
| "learning_rate": 1.9319349991246887e-05, | |
| "loss": 0.7422, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.4353910744829731, | |
| "grad_norm": 1.1199235916137695, | |
| "learning_rate": 1.9312927621836058e-05, | |
| "loss": 0.7362, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.43702788303366097, | |
| "grad_norm": 1.1646606922149658, | |
| "learning_rate": 1.930647617267029e-05, | |
| "loss": 0.7274, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.43866469158434884, | |
| "grad_norm": 1.1620571613311768, | |
| "learning_rate": 1.9299995663894325e-05, | |
| "loss": 0.7351, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.4403015001350367, | |
| "grad_norm": 1.1194571256637573, | |
| "learning_rate": 1.9293486115743646e-05, | |
| "loss": 0.7309, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.4419383086857246, | |
| "grad_norm": 1.1805561780929565, | |
| "learning_rate": 1.928694754854442e-05, | |
| "loss": 0.7378, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.44357511723641246, | |
| "grad_norm": 1.1845600605010986, | |
| "learning_rate": 1.9280379982713417e-05, | |
| "loss": 0.7319, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.44521192578710034, | |
| "grad_norm": 1.2962830066680908, | |
| "learning_rate": 1.927378343875796e-05, | |
| "loss": 0.7305, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.44684873433778816, | |
| "grad_norm": 1.0655794143676758, | |
| "learning_rate": 1.9267157937275854e-05, | |
| "loss": 0.7236, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.44848554288847603, | |
| "grad_norm": 1.0807515382766724, | |
| "learning_rate": 1.9260503498955326e-05, | |
| "loss": 0.7326, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.4501223514391639, | |
| "grad_norm": 1.0515137910842896, | |
| "learning_rate": 1.9253820144574958e-05, | |
| "loss": 0.7293, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.4517591599898518, | |
| "grad_norm": 1.103508710861206, | |
| "learning_rate": 1.9247107895003628e-05, | |
| "loss": 0.7473, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.45339596854053965, | |
| "grad_norm": 1.1016185283660889, | |
| "learning_rate": 1.924036677120043e-05, | |
| "loss": 0.7264, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.4550327770912275, | |
| "grad_norm": 1.0213091373443604, | |
| "learning_rate": 1.9233596794214623e-05, | |
| "loss": 0.7325, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.4566695856419154, | |
| "grad_norm": 1.1028705835342407, | |
| "learning_rate": 1.9226797985185565e-05, | |
| "loss": 0.7381, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.4583063941926033, | |
| "grad_norm": 1.0844396352767944, | |
| "learning_rate": 1.9219970365342634e-05, | |
| "loss": 0.7279, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.45994320274329115, | |
| "grad_norm": 1.037714958190918, | |
| "learning_rate": 1.9213113956005176e-05, | |
| "loss": 0.7433, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.461580011293979, | |
| "grad_norm": 1.2123370170593262, | |
| "learning_rate": 1.9206228778582435e-05, | |
| "loss": 0.7341, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.4632168198446669, | |
| "grad_norm": 1.013845682144165, | |
| "learning_rate": 1.9199314854573474e-05, | |
| "loss": 0.7369, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.4648536283953547, | |
| "grad_norm": 1.0552864074707031, | |
| "learning_rate": 1.9192372205567123e-05, | |
| "loss": 0.7202, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.4664904369460426, | |
| "grad_norm": 1.049025058746338, | |
| "learning_rate": 1.9185400853241917e-05, | |
| "loss": 0.7246, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.46812724549673046, | |
| "grad_norm": 1.0877737998962402, | |
| "learning_rate": 1.9178400819365994e-05, | |
| "loss": 0.7261, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.46976405404741833, | |
| "grad_norm": 1.099348783493042, | |
| "learning_rate": 1.9171372125797072e-05, | |
| "loss": 0.7327, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.4714008625981062, | |
| "grad_norm": 1.1000944375991821, | |
| "learning_rate": 1.916431479448235e-05, | |
| "loss": 0.7305, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.4730376711487941, | |
| "grad_norm": 1.0979351997375488, | |
| "learning_rate": 1.9157228847458446e-05, | |
| "loss": 0.7279, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.47467447969948195, | |
| "grad_norm": 1.0918766260147095, | |
| "learning_rate": 1.9150114306851336e-05, | |
| "loss": 0.7215, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.47631128825016983, | |
| "grad_norm": 1.109971046447754, | |
| "learning_rate": 1.9142971194876284e-05, | |
| "loss": 0.7322, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.4779480968008577, | |
| "grad_norm": 1.1282057762145996, | |
| "learning_rate": 1.913579953383776e-05, | |
| "loss": 0.7257, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.4795849053515456, | |
| "grad_norm": 1.1076371669769287, | |
| "learning_rate": 1.912859934612938e-05, | |
| "loss": 0.7516, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.48122171390223345, | |
| "grad_norm": 1.1480896472930908, | |
| "learning_rate": 1.9121370654233843e-05, | |
| "loss": 0.728, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.48285852245292127, | |
| "grad_norm": 1.1083163022994995, | |
| "learning_rate": 1.911411348072284e-05, | |
| "loss": 0.7235, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.48449533100360914, | |
| "grad_norm": 1.2141623497009277, | |
| "learning_rate": 1.9106827848257007e-05, | |
| "loss": 0.7237, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.486132139554297, | |
| "grad_norm": 1.0334457159042358, | |
| "learning_rate": 1.9099513779585836e-05, | |
| "loss": 0.7306, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.4877689481049849, | |
| "grad_norm": 1.1086657047271729, | |
| "learning_rate": 1.909217129754762e-05, | |
| "loss": 0.7295, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.48940575665567276, | |
| "grad_norm": 1.0128360986709595, | |
| "learning_rate": 1.908480042506937e-05, | |
| "loss": 0.733, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.49104256520636064, | |
| "grad_norm": 1.1484946012496948, | |
| "learning_rate": 1.907740118516674e-05, | |
| "loss": 0.7396, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.4926793737570485, | |
| "grad_norm": 1.031750202178955, | |
| "learning_rate": 1.9069973600943962e-05, | |
| "loss": 0.7204, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.4943161823077364, | |
| "grad_norm": 1.1274133920669556, | |
| "learning_rate": 1.9062517695593792e-05, | |
| "loss": 0.7235, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.49595299085842426, | |
| "grad_norm": 1.1863317489624023, | |
| "learning_rate": 1.9055033492397396e-05, | |
| "loss": 0.7329, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.49758979940911213, | |
| "grad_norm": 1.0985053777694702, | |
| "learning_rate": 1.9047521014724303e-05, | |
| "loss": 0.7341, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.4992266079598, | |
| "grad_norm": 1.136760950088501, | |
| "learning_rate": 1.9039980286032353e-05, | |
| "loss": 0.7189, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.5008634165104878, | |
| "grad_norm": 1.0787100791931152, | |
| "learning_rate": 1.9032411329867573e-05, | |
| "loss": 0.7298, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.5025002250611758, | |
| "grad_norm": 1.3436377048492432, | |
| "learning_rate": 1.902481416986414e-05, | |
| "loss": 0.719, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.5041370336118636, | |
| "grad_norm": 1.1863504648208618, | |
| "learning_rate": 1.9017188829744305e-05, | |
| "loss": 0.7125, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.5057738421625515, | |
| "grad_norm": 1.0385360717773438, | |
| "learning_rate": 1.90095353333183e-05, | |
| "loss": 0.7297, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.5074106507132393, | |
| "grad_norm": 1.1736425161361694, | |
| "learning_rate": 1.9001853704484285e-05, | |
| "loss": 0.7205, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.5090474592639272, | |
| "grad_norm": 1.0939114093780518, | |
| "learning_rate": 1.899414396722826e-05, | |
| "loss": 0.741, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 0.5106842678146151, | |
| "grad_norm": 1.3368091583251953, | |
| "learning_rate": 1.8986406145623996e-05, | |
| "loss": 0.7277, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.5123210763653029, | |
| "grad_norm": 1.1556004285812378, | |
| "learning_rate": 1.897864026383295e-05, | |
| "loss": 0.7383, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 0.5139578849159908, | |
| "grad_norm": 1.2308059930801392, | |
| "learning_rate": 1.897084634610421e-05, | |
| "loss": 0.7188, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.5155946934666786, | |
| "grad_norm": 1.1211739778518677, | |
| "learning_rate": 1.8963024416774393e-05, | |
| "loss": 0.7241, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.5172315020173666, | |
| "grad_norm": 1.1302770376205444, | |
| "learning_rate": 1.8955174500267596e-05, | |
| "loss": 0.7207, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.5188683105680544, | |
| "grad_norm": 1.1893266439437866, | |
| "learning_rate": 1.8947296621095297e-05, | |
| "loss": 0.7088, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 0.5205051191187423, | |
| "grad_norm": 1.2034817934036255, | |
| "learning_rate": 1.893939080385629e-05, | |
| "loss": 0.7225, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.5221419276694301, | |
| "grad_norm": 1.0935208797454834, | |
| "learning_rate": 1.8931457073236612e-05, | |
| "loss": 0.7219, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 0.5237787362201181, | |
| "grad_norm": 1.2129491567611694, | |
| "learning_rate": 1.892349545400945e-05, | |
| "loss": 0.7323, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.5254155447708059, | |
| "grad_norm": 1.0750499963760376, | |
| "learning_rate": 1.8915505971035077e-05, | |
| "loss": 0.7213, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 0.5270523533214938, | |
| "grad_norm": 1.1311250925064087, | |
| "learning_rate": 1.8907488649260775e-05, | |
| "loss": 0.7265, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.5286891618721816, | |
| "grad_norm": 1.1503121852874756, | |
| "learning_rate": 1.889944351372075e-05, | |
| "loss": 0.7177, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 0.5303259704228694, | |
| "grad_norm": 1.3034614324569702, | |
| "learning_rate": 1.8891370589536058e-05, | |
| "loss": 0.7118, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.5319627789735574, | |
| "grad_norm": 1.0626057386398315, | |
| "learning_rate": 1.8883269901914524e-05, | |
| "loss": 0.7205, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.5335995875242452, | |
| "grad_norm": 1.2290301322937012, | |
| "learning_rate": 1.8875141476150664e-05, | |
| "loss": 0.73, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.5352363960749331, | |
| "grad_norm": 1.2172757387161255, | |
| "learning_rate": 1.8866985337625615e-05, | |
| "loss": 0.7234, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 0.5368732046256209, | |
| "grad_norm": 1.0496524572372437, | |
| "learning_rate": 1.885880151180703e-05, | |
| "loss": 0.7127, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.5385100131763089, | |
| "grad_norm": 0.9903925061225891, | |
| "learning_rate": 1.8850590024249037e-05, | |
| "loss": 0.728, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 0.5401468217269967, | |
| "grad_norm": 1.2562659978866577, | |
| "learning_rate": 1.8842350900592122e-05, | |
| "loss": 0.7188, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.5417836302776846, | |
| "grad_norm": 1.2212430238723755, | |
| "learning_rate": 1.8834084166563072e-05, | |
| "loss": 0.7086, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 0.5434204388283724, | |
| "grad_norm": 1.1504745483398438, | |
| "learning_rate": 1.882578984797489e-05, | |
| "loss": 0.7198, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.5450572473790604, | |
| "grad_norm": 1.1029900312423706, | |
| "learning_rate": 1.8817467970726704e-05, | |
| "loss": 0.729, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 0.5466940559297482, | |
| "grad_norm": 1.1274054050445557, | |
| "learning_rate": 1.8809118560803704e-05, | |
| "loss": 0.7249, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.548330864480436, | |
| "grad_norm": 1.093854546546936, | |
| "learning_rate": 1.880074164427704e-05, | |
| "loss": 0.704, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.5499676730311239, | |
| "grad_norm": 1.0846567153930664, | |
| "learning_rate": 1.879233724730377e-05, | |
| "loss": 0.7194, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.5516044815818117, | |
| "grad_norm": 1.35237455368042, | |
| "learning_rate": 1.8783905396126737e-05, | |
| "loss": 0.7205, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 0.5532412901324997, | |
| "grad_norm": 0.9714828133583069, | |
| "learning_rate": 1.8775446117074528e-05, | |
| "loss": 0.7334, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.5548780986831875, | |
| "grad_norm": 1.2619616985321045, | |
| "learning_rate": 1.8766959436561363e-05, | |
| "loss": 0.718, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 0.5565149072338754, | |
| "grad_norm": 1.036129355430603, | |
| "learning_rate": 1.8758445381087034e-05, | |
| "loss": 0.7191, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.5581517157845632, | |
| "grad_norm": 1.097095012664795, | |
| "learning_rate": 1.8749903977236802e-05, | |
| "loss": 0.7171, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 0.5597885243352512, | |
| "grad_norm": 1.1133558750152588, | |
| "learning_rate": 1.8741335251681328e-05, | |
| "loss": 0.7179, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 0.561425332885939, | |
| "grad_norm": 1.0562981367111206, | |
| "learning_rate": 1.8732739231176587e-05, | |
| "loss": 0.7201, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 0.5630621414366269, | |
| "grad_norm": 1.20978581905365, | |
| "learning_rate": 1.8724115942563773e-05, | |
| "loss": 0.7129, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.5646989499873147, | |
| "grad_norm": 1.0966860055923462, | |
| "learning_rate": 1.8715465412769243e-05, | |
| "loss": 0.715, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.5663357585380026, | |
| "grad_norm": 1.2173317670822144, | |
| "learning_rate": 1.87067876688044e-05, | |
| "loss": 0.7052, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 0.5679725670886905, | |
| "grad_norm": 1.126670241355896, | |
| "learning_rate": 1.869808273776563e-05, | |
| "loss": 0.7172, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 0.5696093756393783, | |
| "grad_norm": 1.0486496686935425, | |
| "learning_rate": 1.8689350646834207e-05, | |
| "loss": 0.7269, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.5712461841900662, | |
| "grad_norm": 1.1730561256408691, | |
| "learning_rate": 1.868059142327622e-05, | |
| "loss": 0.7191, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 0.572882992740754, | |
| "grad_norm": 1.1153805255889893, | |
| "learning_rate": 1.867180509444247e-05, | |
| "loss": 0.7124, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.574519801291442, | |
| "grad_norm": 1.200767159461975, | |
| "learning_rate": 1.8662991687768394e-05, | |
| "loss": 0.7342, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 0.5761566098421298, | |
| "grad_norm": 1.093985676765442, | |
| "learning_rate": 1.8654151230774e-05, | |
| "loss": 0.7073, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.5777934183928177, | |
| "grad_norm": 1.1902211904525757, | |
| "learning_rate": 1.8645283751063734e-05, | |
| "loss": 0.7147, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 0.5794302269435055, | |
| "grad_norm": 1.1363279819488525, | |
| "learning_rate": 1.863638927632644e-05, | |
| "loss": 0.7162, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 0.5810670354941935, | |
| "grad_norm": 1.2271382808685303, | |
| "learning_rate": 1.8627467834335243e-05, | |
| "loss": 0.7042, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.5827038440448813, | |
| "grad_norm": 1.1823738813400269, | |
| "learning_rate": 1.8618519452947484e-05, | |
| "loss": 0.7197, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.5843406525955691, | |
| "grad_norm": 1.042771577835083, | |
| "learning_rate": 1.8609544160104608e-05, | |
| "loss": 0.7103, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 0.585977461146257, | |
| "grad_norm": 1.2053323984146118, | |
| "learning_rate": 1.8600541983832114e-05, | |
| "loss": 0.7206, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 0.5876142696969449, | |
| "grad_norm": 1.2077679634094238, | |
| "learning_rate": 1.8591512952239416e-05, | |
| "loss": 0.7003, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 0.5892510782476328, | |
| "grad_norm": 1.2675883769989014, | |
| "learning_rate": 1.8582457093519806e-05, | |
| "loss": 0.7119, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.5908878867983206, | |
| "grad_norm": 1.102798342704773, | |
| "learning_rate": 1.857337443595034e-05, | |
| "loss": 0.7097, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 0.5925246953490085, | |
| "grad_norm": 1.0432052612304688, | |
| "learning_rate": 1.8564265007891747e-05, | |
| "loss": 0.7197, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 0.5941615038996964, | |
| "grad_norm": 1.1461999416351318, | |
| "learning_rate": 1.8555128837788356e-05, | |
| "loss": 0.7128, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 0.5957983124503843, | |
| "grad_norm": 1.1425740718841553, | |
| "learning_rate": 1.854596595416799e-05, | |
| "loss": 0.7221, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 0.5974351210010721, | |
| "grad_norm": 1.1499603986740112, | |
| "learning_rate": 1.8536776385641896e-05, | |
| "loss": 0.7118, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.59907192955176, | |
| "grad_norm": 1.1369038820266724, | |
| "learning_rate": 1.8527560160904628e-05, | |
| "loss": 0.7101, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 0.6007087381024478, | |
| "grad_norm": 1.3000248670578003, | |
| "learning_rate": 1.8518317308733987e-05, | |
| "loss": 0.7042, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 0.6023455466531357, | |
| "grad_norm": 1.193550944328308, | |
| "learning_rate": 1.8509047857990925e-05, | |
| "loss": 0.7143, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 0.6039823552038236, | |
| "grad_norm": 1.1038364171981812, | |
| "learning_rate": 1.849975183761943e-05, | |
| "loss": 0.6953, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 0.6056191637545114, | |
| "grad_norm": 1.2535215616226196, | |
| "learning_rate": 1.849042927664647e-05, | |
| "loss": 0.7021, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.6072559723051993, | |
| "grad_norm": 1.1770461797714233, | |
| "learning_rate": 1.848108020418188e-05, | |
| "loss": 0.6971, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 0.6088927808558872, | |
| "grad_norm": 1.3245750665664673, | |
| "learning_rate": 1.8471704649418272e-05, | |
| "loss": 0.7062, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 0.6105295894065751, | |
| "grad_norm": 1.064820408821106, | |
| "learning_rate": 1.8462302641630957e-05, | |
| "loss": 0.7247, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 0.6121663979572629, | |
| "grad_norm": 1.2426869869232178, | |
| "learning_rate": 1.8452874210177853e-05, | |
| "loss": 0.697, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 0.6138032065079508, | |
| "grad_norm": 1.0495688915252686, | |
| "learning_rate": 1.8443419384499367e-05, | |
| "loss": 0.7066, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.6154400150586387, | |
| "grad_norm": 1.0227185487747192, | |
| "learning_rate": 1.8433938194118332e-05, | |
| "loss": 0.6975, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 0.6170768236093266, | |
| "grad_norm": 1.1213784217834473, | |
| "learning_rate": 1.8424430668639916e-05, | |
| "loss": 0.7101, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 0.6187136321600144, | |
| "grad_norm": 1.3823000192642212, | |
| "learning_rate": 1.8414896837751497e-05, | |
| "loss": 0.7143, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 0.6203504407107022, | |
| "grad_norm": 1.280870795249939, | |
| "learning_rate": 1.8405336731222615e-05, | |
| "loss": 0.7137, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 0.6219872492613902, | |
| "grad_norm": 1.1578929424285889, | |
| "learning_rate": 1.839575037890483e-05, | |
| "loss": 0.7035, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.623624057812078, | |
| "grad_norm": 1.1784029006958008, | |
| "learning_rate": 1.838613781073169e-05, | |
| "loss": 0.7003, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 0.6252608663627659, | |
| "grad_norm": 1.5140550136566162, | |
| "learning_rate": 1.8376499056718563e-05, | |
| "loss": 0.7182, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 0.6268976749134537, | |
| "grad_norm": 1.1795947551727295, | |
| "learning_rate": 1.8366834146962613e-05, | |
| "loss": 0.707, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 0.6285344834641416, | |
| "grad_norm": 1.2156872749328613, | |
| "learning_rate": 1.8357143111642658e-05, | |
| "loss": 0.7041, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.6301712920148295, | |
| "grad_norm": 1.120609164237976, | |
| "learning_rate": 1.8347425981019104e-05, | |
| "loss": 0.7087, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.6318081005655174, | |
| "grad_norm": 1.0960373878479004, | |
| "learning_rate": 1.8337682785433838e-05, | |
| "loss": 0.7136, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 0.6334449091162052, | |
| "grad_norm": 1.2065433263778687, | |
| "learning_rate": 1.8327913555310125e-05, | |
| "loss": 0.7077, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 0.6350817176668931, | |
| "grad_norm": 1.158570647239685, | |
| "learning_rate": 1.8318118321152534e-05, | |
| "loss": 0.7199, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 0.636718526217581, | |
| "grad_norm": 1.1315112113952637, | |
| "learning_rate": 1.8308297113546834e-05, | |
| "loss": 0.7157, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 0.6383553347682688, | |
| "grad_norm": 1.567763328552246, | |
| "learning_rate": 1.829844996315989e-05, | |
| "loss": 0.7024, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.6399921433189567, | |
| "grad_norm": 1.3154592514038086, | |
| "learning_rate": 1.8288576900739573e-05, | |
| "loss": 0.7093, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 0.6416289518696445, | |
| "grad_norm": 1.2426626682281494, | |
| "learning_rate": 1.8278677957114666e-05, | |
| "loss": 0.7108, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 0.6432657604203325, | |
| "grad_norm": 1.2186305522918701, | |
| "learning_rate": 1.8268753163194773e-05, | |
| "loss": 0.704, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 0.6449025689710203, | |
| "grad_norm": 1.049307942390442, | |
| "learning_rate": 1.8258802549970206e-05, | |
| "loss": 0.7057, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 0.6465393775217082, | |
| "grad_norm": 1.3523504734039307, | |
| "learning_rate": 1.8248826148511908e-05, | |
| "loss": 0.6965, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.648176186072396, | |
| "grad_norm": 1.2402653694152832, | |
| "learning_rate": 1.823882398997133e-05, | |
| "loss": 0.704, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 0.649812994623084, | |
| "grad_norm": 1.3009974956512451, | |
| "learning_rate": 1.8228796105580373e-05, | |
| "loss": 0.6892, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 0.6514498031737718, | |
| "grad_norm": 1.161328673362732, | |
| "learning_rate": 1.821874252665125e-05, | |
| "loss": 0.7099, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 0.6530866117244597, | |
| "grad_norm": 1.5753206014633179, | |
| "learning_rate": 1.820866328457641e-05, | |
| "loss": 0.6958, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 0.6547234202751475, | |
| "grad_norm": 1.1261160373687744, | |
| "learning_rate": 1.8198558410828436e-05, | |
| "loss": 0.7048, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.6563602288258353, | |
| "grad_norm": 1.2303427457809448, | |
| "learning_rate": 1.818842793695995e-05, | |
| "loss": 0.7024, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 0.6579970373765233, | |
| "grad_norm": 1.2187303304672241, | |
| "learning_rate": 1.8178271894603502e-05, | |
| "loss": 0.696, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 0.6596338459272111, | |
| "grad_norm": 1.1081221103668213, | |
| "learning_rate": 1.8168090315471488e-05, | |
| "loss": 0.7082, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 0.661270654477899, | |
| "grad_norm": 1.1961265802383423, | |
| "learning_rate": 1.8157883231356036e-05, | |
| "loss": 0.6875, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 0.6629074630285868, | |
| "grad_norm": 1.1577361822128296, | |
| "learning_rate": 1.8147650674128927e-05, | |
| "loss": 0.7004, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.6645442715792748, | |
| "grad_norm": 1.1837248802185059, | |
| "learning_rate": 1.813739267574147e-05, | |
| "loss": 0.7084, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 0.6661810801299626, | |
| "grad_norm": 1.140136957168579, | |
| "learning_rate": 1.8127109268224414e-05, | |
| "loss": 0.6897, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 0.6678178886806505, | |
| "grad_norm": 1.132994532585144, | |
| "learning_rate": 1.811680048368785e-05, | |
| "loss": 0.6999, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 0.6694546972313383, | |
| "grad_norm": 1.184187889099121, | |
| "learning_rate": 1.8106466354321113e-05, | |
| "loss": 0.6994, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 0.6710915057820263, | |
| "grad_norm": 1.1196414232254028, | |
| "learning_rate": 1.809610691239268e-05, | |
| "loss": 0.7008, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.6727283143327141, | |
| "grad_norm": 1.1688846349716187, | |
| "learning_rate": 1.808572219025006e-05, | |
| "loss": 0.6954, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 0.6743651228834019, | |
| "grad_norm": 1.222205638885498, | |
| "learning_rate": 1.80753122203197e-05, | |
| "loss": 0.6918, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 0.6760019314340898, | |
| "grad_norm": 1.1374167203903198, | |
| "learning_rate": 1.8064877035106887e-05, | |
| "loss": 0.6906, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 0.6776387399847776, | |
| "grad_norm": 1.0707694292068481, | |
| "learning_rate": 1.8054416667195643e-05, | |
| "loss": 0.6943, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 0.6792755485354656, | |
| "grad_norm": 1.1394332647323608, | |
| "learning_rate": 1.8043931149248625e-05, | |
| "loss": 0.7073, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.6809123570861534, | |
| "grad_norm": 1.118058443069458, | |
| "learning_rate": 1.803342051400701e-05, | |
| "loss": 0.6983, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 0.6825491656368413, | |
| "grad_norm": 1.3730331659317017, | |
| "learning_rate": 1.8022884794290417e-05, | |
| "loss": 0.6924, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 0.6841859741875291, | |
| "grad_norm": 1.1573492288589478, | |
| "learning_rate": 1.801232402299679e-05, | |
| "loss": 0.6964, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 0.6858227827382171, | |
| "grad_norm": 1.1315394639968872, | |
| "learning_rate": 1.80017382331023e-05, | |
| "loss": 0.693, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 0.6874595912889049, | |
| "grad_norm": 1.1479718685150146, | |
| "learning_rate": 1.799112745766122e-05, | |
| "loss": 0.6985, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.6890963998395928, | |
| "grad_norm": 1.1869304180145264, | |
| "learning_rate": 1.7980491729805858e-05, | |
| "loss": 0.7132, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 0.6907332083902806, | |
| "grad_norm": 1.322792887687683, | |
| "learning_rate": 1.796983108274644e-05, | |
| "loss": 0.7085, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 0.6923700169409684, | |
| "grad_norm": 1.1635984182357788, | |
| "learning_rate": 1.7959145549770985e-05, | |
| "loss": 0.7117, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 0.6940068254916564, | |
| "grad_norm": 1.1490191221237183, | |
| "learning_rate": 1.7948435164245236e-05, | |
| "loss": 0.697, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 0.6956436340423442, | |
| "grad_norm": 1.2376859188079834, | |
| "learning_rate": 1.7937699959612523e-05, | |
| "loss": 0.7079, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.6972804425930321, | |
| "grad_norm": 1.2555029392242432, | |
| "learning_rate": 1.7926939969393693e-05, | |
| "loss": 0.6895, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 0.6989172511437199, | |
| "grad_norm": 1.1793533563613892, | |
| "learning_rate": 1.7916155227186966e-05, | |
| "loss": 0.6784, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 0.7005540596944079, | |
| "grad_norm": 1.0882368087768555, | |
| "learning_rate": 1.7905345766667867e-05, | |
| "loss": 0.6875, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 0.7021908682450957, | |
| "grad_norm": 1.2925825119018555, | |
| "learning_rate": 1.789451162158909e-05, | |
| "loss": 0.7072, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 0.7038276767957836, | |
| "grad_norm": 1.2188570499420166, | |
| "learning_rate": 1.7883652825780418e-05, | |
| "loss": 0.7084, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.7054644853464714, | |
| "grad_norm": 1.2425892353057861, | |
| "learning_rate": 1.7872769413148602e-05, | |
| "loss": 0.7059, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 0.7071012938971594, | |
| "grad_norm": 1.3490030765533447, | |
| "learning_rate": 1.786186141767726e-05, | |
| "loss": 0.6861, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 0.7087381024478472, | |
| "grad_norm": 1.2493983507156372, | |
| "learning_rate": 1.785092887342677e-05, | |
| "loss": 0.6862, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 0.710374910998535, | |
| "grad_norm": 1.1606495380401611, | |
| "learning_rate": 1.7839971814534163e-05, | |
| "loss": 0.6959, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 0.7120117195492229, | |
| "grad_norm": 1.0867750644683838, | |
| "learning_rate": 1.7828990275213023e-05, | |
| "loss": 0.6838, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.7136485280999108, | |
| "grad_norm": 1.4481595754623413, | |
| "learning_rate": 1.781798428975336e-05, | |
| "loss": 0.6877, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 0.7152853366505987, | |
| "grad_norm": 1.0603893995285034, | |
| "learning_rate": 1.7806953892521536e-05, | |
| "loss": 0.6922, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 0.7169221452012865, | |
| "grad_norm": 1.1686676740646362, | |
| "learning_rate": 1.7795899117960126e-05, | |
| "loss": 0.6933, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 0.7185589537519744, | |
| "grad_norm": 1.423593282699585, | |
| "learning_rate": 1.7784820000587828e-05, | |
| "loss": 0.6947, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 0.7201957623026622, | |
| "grad_norm": 1.2158969640731812, | |
| "learning_rate": 1.7773716574999354e-05, | |
| "loss": 0.6832, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.7218325708533502, | |
| "grad_norm": 1.3259363174438477, | |
| "learning_rate": 1.776258887586531e-05, | |
| "loss": 0.6836, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 0.723469379404038, | |
| "grad_norm": 1.2114306688308716, | |
| "learning_rate": 1.775143693793211e-05, | |
| "loss": 0.6934, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 0.7251061879547259, | |
| "grad_norm": 1.0769015550613403, | |
| "learning_rate": 1.774026079602184e-05, | |
| "loss": 0.692, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 0.7267429965054137, | |
| "grad_norm": 1.098381519317627, | |
| "learning_rate": 1.7729060485032167e-05, | |
| "loss": 0.6929, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 0.7283798050561016, | |
| "grad_norm": 1.1960115432739258, | |
| "learning_rate": 1.7717836039936235e-05, | |
| "loss": 0.6895, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.7300166136067895, | |
| "grad_norm": 1.2899237871170044, | |
| "learning_rate": 1.7706587495782538e-05, | |
| "loss": 0.6891, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 0.7316534221574773, | |
| "grad_norm": 1.1849106550216675, | |
| "learning_rate": 1.769531488769482e-05, | |
| "loss": 0.6994, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 0.7332902307081652, | |
| "grad_norm": 1.0840647220611572, | |
| "learning_rate": 1.7684018250871967e-05, | |
| "loss": 0.6902, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 0.734927039258853, | |
| "grad_norm": 1.1262308359146118, | |
| "learning_rate": 1.7672697620587904e-05, | |
| "loss": 0.686, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 0.736563847809541, | |
| "grad_norm": 1.2281126976013184, | |
| "learning_rate": 1.7661353032191458e-05, | |
| "loss": 0.6971, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.7382006563602288, | |
| "grad_norm": 1.0803622007369995, | |
| "learning_rate": 1.7649984521106282e-05, | |
| "loss": 0.694, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 0.7398374649109167, | |
| "grad_norm": 1.4072610139846802, | |
| "learning_rate": 1.763859212283071e-05, | |
| "loss": 0.704, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 0.7414742734616045, | |
| "grad_norm": 1.2351950407028198, | |
| "learning_rate": 1.7627175872937686e-05, | |
| "loss": 0.6991, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 0.7431110820122925, | |
| "grad_norm": 1.1985889673233032, | |
| "learning_rate": 1.7615735807074616e-05, | |
| "loss": 0.6947, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 0.7447478905629803, | |
| "grad_norm": 1.1948813199996948, | |
| "learning_rate": 1.7604271960963274e-05, | |
| "loss": 0.6986, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.7463846991136681, | |
| "grad_norm": 1.2745295763015747, | |
| "learning_rate": 1.759278437039969e-05, | |
| "loss": 0.6989, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 0.748021507664356, | |
| "grad_norm": 1.1414821147918701, | |
| "learning_rate": 1.7581273071254038e-05, | |
| "loss": 0.6883, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 0.7496583162150439, | |
| "grad_norm": 1.1246697902679443, | |
| "learning_rate": 1.7569738099470524e-05, | |
| "loss": 0.6818, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 0.7512951247657318, | |
| "grad_norm": 1.1820296049118042, | |
| "learning_rate": 1.7558179491067263e-05, | |
| "loss": 0.7079, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 0.7529319333164196, | |
| "grad_norm": 1.1293789148330688, | |
| "learning_rate": 1.7546597282136186e-05, | |
| "loss": 0.696, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.7545687418671075, | |
| "grad_norm": 1.2405450344085693, | |
| "learning_rate": 1.753499150884291e-05, | |
| "loss": 0.6912, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 0.7562055504177954, | |
| "grad_norm": 1.2177417278289795, | |
| "learning_rate": 1.7523362207426634e-05, | |
| "loss": 0.6824, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 0.7578423589684833, | |
| "grad_norm": 1.124414086341858, | |
| "learning_rate": 1.7511709414200024e-05, | |
| "loss": 0.6868, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 0.7594791675191711, | |
| "grad_norm": 1.1439573764801025, | |
| "learning_rate": 1.7500033165549105e-05, | |
| "loss": 0.6882, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 0.761115976069859, | |
| "grad_norm": 1.1549428701400757, | |
| "learning_rate": 1.7488333497933133e-05, | |
| "loss": 0.681, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.7627527846205469, | |
| "grad_norm": 1.3092726469039917, | |
| "learning_rate": 1.7476610447884492e-05, | |
| "loss": 0.6973, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 0.7643895931712347, | |
| "grad_norm": 1.5812910795211792, | |
| "learning_rate": 1.7464864052008586e-05, | |
| "loss": 0.6855, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 0.7660264017219226, | |
| "grad_norm": 1.189775824546814, | |
| "learning_rate": 1.7453094346983707e-05, | |
| "loss": 0.6983, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 0.7676632102726104, | |
| "grad_norm": 1.3100470304489136, | |
| "learning_rate": 1.7441301369560934e-05, | |
| "loss": 0.6938, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 0.7693000188232983, | |
| "grad_norm": 1.227925419807434, | |
| "learning_rate": 1.7429485156564014e-05, | |
| "loss": 0.6762, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.7709368273739862, | |
| "grad_norm": 1.3295223712921143, | |
| "learning_rate": 1.7417645744889248e-05, | |
| "loss": 0.6823, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 0.7725736359246741, | |
| "grad_norm": 1.1091123819351196, | |
| "learning_rate": 1.740578317150538e-05, | |
| "loss": 0.6978, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 0.7742104444753619, | |
| "grad_norm": 1.2926867008209229, | |
| "learning_rate": 1.7393897473453462e-05, | |
| "loss": 0.6853, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 0.7758472530260498, | |
| "grad_norm": 1.279630422592163, | |
| "learning_rate": 1.738198868784677e-05, | |
| "loss": 0.6911, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 0.7774840615767377, | |
| "grad_norm": 1.1175949573516846, | |
| "learning_rate": 1.7370056851870665e-05, | |
| "loss": 0.687, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.7791208701274256, | |
| "grad_norm": 1.0889476537704468, | |
| "learning_rate": 1.7358102002782477e-05, | |
| "loss": 0.689, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 0.7807576786781134, | |
| "grad_norm": 1.1944537162780762, | |
| "learning_rate": 1.7346124177911402e-05, | |
| "loss": 0.6841, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 0.7823944872288013, | |
| "grad_norm": 1.208275556564331, | |
| "learning_rate": 1.7334123414658376e-05, | |
| "loss": 0.6777, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 0.7840312957794892, | |
| "grad_norm": 1.1608806848526, | |
| "learning_rate": 1.7322099750495964e-05, | |
| "loss": 0.6841, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 0.785668104330177, | |
| "grad_norm": 1.0674712657928467, | |
| "learning_rate": 1.731005322296823e-05, | |
| "loss": 0.6765, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.7873049128808649, | |
| "grad_norm": 1.1852935552597046, | |
| "learning_rate": 1.729798386969064e-05, | |
| "loss": 0.6968, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 0.7889417214315527, | |
| "grad_norm": 1.1918047666549683, | |
| "learning_rate": 1.728589172834993e-05, | |
| "loss": 0.6815, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 0.7905785299822407, | |
| "grad_norm": 1.3117504119873047, | |
| "learning_rate": 1.7273776836703985e-05, | |
| "loss": 0.6799, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 0.7922153385329285, | |
| "grad_norm": 1.2398260831832886, | |
| "learning_rate": 1.726163923258174e-05, | |
| "loss": 0.6869, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 0.7938521470836164, | |
| "grad_norm": 1.2091760635375977, | |
| "learning_rate": 1.724947895388304e-05, | |
| "loss": 0.6679, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.7954889556343042, | |
| "grad_norm": 1.1533339023590088, | |
| "learning_rate": 1.723729603857854e-05, | |
| "loss": 0.6877, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 0.7971257641849921, | |
| "grad_norm": 1.2629398107528687, | |
| "learning_rate": 1.7225090524709577e-05, | |
| "loss": 0.6878, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 0.79876257273568, | |
| "grad_norm": 1.202531099319458, | |
| "learning_rate": 1.7212862450388037e-05, | |
| "loss": 0.6911, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 0.8003993812863679, | |
| "grad_norm": 1.189326286315918, | |
| "learning_rate": 1.7200611853796278e-05, | |
| "loss": 0.6966, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 0.8020361898370557, | |
| "grad_norm": 1.2614778280258179, | |
| "learning_rate": 1.718833877318696e-05, | |
| "loss": 0.6952, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.8036729983877435, | |
| "grad_norm": 1.1864616870880127, | |
| "learning_rate": 1.7176043246882966e-05, | |
| "loss": 0.6756, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 0.8053098069384315, | |
| "grad_norm": 1.205569863319397, | |
| "learning_rate": 1.7163725313277255e-05, | |
| "loss": 0.6748, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 0.8069466154891193, | |
| "grad_norm": 1.2782241106033325, | |
| "learning_rate": 1.715138501083276e-05, | |
| "loss": 0.6903, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 0.8085834240398072, | |
| "grad_norm": 1.0571094751358032, | |
| "learning_rate": 1.7139022378082256e-05, | |
| "loss": 0.6871, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 0.810220232590495, | |
| "grad_norm": 1.3369005918502808, | |
| "learning_rate": 1.712663745362826e-05, | |
| "loss": 0.6746, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.811857041141183, | |
| "grad_norm": 1.2506871223449707, | |
| "learning_rate": 1.7114230276142866e-05, | |
| "loss": 0.6935, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 0.8134938496918708, | |
| "grad_norm": 1.3436931371688843, | |
| "learning_rate": 1.7101800884367676e-05, | |
| "loss": 0.6859, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 0.8151306582425587, | |
| "grad_norm": 1.3217076063156128, | |
| "learning_rate": 1.708934931711365e-05, | |
| "loss": 0.6766, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 0.8167674667932465, | |
| "grad_norm": 1.3521711826324463, | |
| "learning_rate": 1.7076875613261e-05, | |
| "loss": 0.6828, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 0.8184042753439345, | |
| "grad_norm": 1.1544018983840942, | |
| "learning_rate": 1.706437981175904e-05, | |
| "loss": 0.6866, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.8200410838946223, | |
| "grad_norm": 1.3795074224472046, | |
| "learning_rate": 1.7051861951626105e-05, | |
| "loss": 0.6893, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 0.8216778924453101, | |
| "grad_norm": 1.2545524835586548, | |
| "learning_rate": 1.7039322071949396e-05, | |
| "loss": 0.6865, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 0.823314700995998, | |
| "grad_norm": 1.3663312196731567, | |
| "learning_rate": 1.702676021188487e-05, | |
| "loss": 0.6858, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 0.8249515095466858, | |
| "grad_norm": 1.4371784925460815, | |
| "learning_rate": 1.701417641065713e-05, | |
| "loss": 0.6827, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 0.8265883180973738, | |
| "grad_norm": 1.465648889541626, | |
| "learning_rate": 1.7001570707559274e-05, | |
| "loss": 0.6813, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.8282251266480616, | |
| "grad_norm": 1.1045328378677368, | |
| "learning_rate": 1.69889431419528e-05, | |
| "loss": 0.6858, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 0.8298619351987495, | |
| "grad_norm": 1.1676952838897705, | |
| "learning_rate": 1.6976293753267467e-05, | |
| "loss": 0.662, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 0.8314987437494373, | |
| "grad_norm": 1.2377560138702393, | |
| "learning_rate": 1.6963622581001188e-05, | |
| "loss": 0.6853, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 0.8331355523001253, | |
| "grad_norm": 1.2052476406097412, | |
| "learning_rate": 1.6950929664719883e-05, | |
| "loss": 0.6898, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 0.8347723608508131, | |
| "grad_norm": 1.400944709777832, | |
| "learning_rate": 1.6938215044057363e-05, | |
| "loss": 0.6905, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.836409169401501, | |
| "grad_norm": 1.2622673511505127, | |
| "learning_rate": 1.6925478758715226e-05, | |
| "loss": 0.6651, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 0.8380459779521888, | |
| "grad_norm": 1.1664501428604126, | |
| "learning_rate": 1.691272084846272e-05, | |
| "loss": 0.6851, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 0.8396827865028766, | |
| "grad_norm": 1.2591482400894165, | |
| "learning_rate": 1.68999413531366e-05, | |
| "loss": 0.6936, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 0.8413195950535646, | |
| "grad_norm": 1.163874864578247, | |
| "learning_rate": 1.6887140312641036e-05, | |
| "loss": 0.6886, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 0.8429564036042524, | |
| "grad_norm": 1.2441082000732422, | |
| "learning_rate": 1.6874317766947458e-05, | |
| "loss": 0.6761, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.8445932121549403, | |
| "grad_norm": 1.1966642141342163, | |
| "learning_rate": 1.6861473756094464e-05, | |
| "loss": 0.6758, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 0.8462300207056281, | |
| "grad_norm": 1.1858773231506348, | |
| "learning_rate": 1.6848608320187668e-05, | |
| "loss": 0.6806, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 0.8478668292563161, | |
| "grad_norm": 1.1656018495559692, | |
| "learning_rate": 1.6835721499399583e-05, | |
| "loss": 0.6768, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 0.8495036378070039, | |
| "grad_norm": 1.2097491025924683, | |
| "learning_rate": 1.6822813333969495e-05, | |
| "loss": 0.6936, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 0.8511404463576918, | |
| "grad_norm": 1.4976009130477905, | |
| "learning_rate": 1.6809883864203352e-05, | |
| "loss": 0.6721, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.8527772549083796, | |
| "grad_norm": 1.3640004396438599, | |
| "learning_rate": 1.6796933130473606e-05, | |
| "loss": 0.6738, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 0.8544140634590676, | |
| "grad_norm": 1.2159740924835205, | |
| "learning_rate": 1.6783961173219116e-05, | |
| "loss": 0.6755, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 0.8560508720097554, | |
| "grad_norm": 1.23357355594635, | |
| "learning_rate": 1.677096803294502e-05, | |
| "loss": 0.6789, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 0.8576876805604432, | |
| "grad_norm": 1.2574186325073242, | |
| "learning_rate": 1.6757953750222586e-05, | |
| "loss": 0.6892, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 0.8593244891111311, | |
| "grad_norm": 1.2394073009490967, | |
| "learning_rate": 1.6744918365689106e-05, | |
| "loss": 0.6726, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.860961297661819, | |
| "grad_norm": 1.2098554372787476, | |
| "learning_rate": 1.6731861920047758e-05, | |
| "loss": 0.6714, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 0.8625981062125069, | |
| "grad_norm": 1.3548126220703125, | |
| "learning_rate": 1.6718784454067495e-05, | |
| "loss": 0.6849, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 0.8642349147631947, | |
| "grad_norm": 1.5218019485473633, | |
| "learning_rate": 1.670568600858289e-05, | |
| "loss": 0.6744, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 0.8658717233138826, | |
| "grad_norm": 1.3826264142990112, | |
| "learning_rate": 1.669256662449404e-05, | |
| "loss": 0.6762, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 0.8675085318645704, | |
| "grad_norm": 1.2154985666275024, | |
| "learning_rate": 1.667942634276642e-05, | |
| "loss": 0.6711, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.8691453404152584, | |
| "grad_norm": 1.3120452165603638, | |
| "learning_rate": 1.666626520443075e-05, | |
| "loss": 0.6788, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 0.8707821489659462, | |
| "grad_norm": 1.2221883535385132, | |
| "learning_rate": 1.665308325058288e-05, | |
| "loss": 0.6661, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 0.8724189575166341, | |
| "grad_norm": 1.385396957397461, | |
| "learning_rate": 1.6639880522383655e-05, | |
| "loss": 0.6714, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 0.8740557660673219, | |
| "grad_norm": 1.2685418128967285, | |
| "learning_rate": 1.6626657061058797e-05, | |
| "loss": 0.668, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 0.8756925746180098, | |
| "grad_norm": 1.513152837753296, | |
| "learning_rate": 1.661341290789875e-05, | |
| "loss": 0.6706, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.8773293831686977, | |
| "grad_norm": 1.2810958623886108, | |
| "learning_rate": 1.6600148104258594e-05, | |
| "loss": 0.6904, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 0.8789661917193855, | |
| "grad_norm": 1.2695286273956299, | |
| "learning_rate": 1.6586862691557863e-05, | |
| "loss": 0.6733, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 0.8806030002700734, | |
| "grad_norm": 1.0760889053344727, | |
| "learning_rate": 1.6573556711280457e-05, | |
| "loss": 0.6743, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 0.8822398088207613, | |
| "grad_norm": 1.3402081727981567, | |
| "learning_rate": 1.6560230204974502e-05, | |
| "loss": 0.6706, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 0.8838766173714492, | |
| "grad_norm": 1.191873550415039, | |
| "learning_rate": 1.654688321425221e-05, | |
| "loss": 0.6764, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.885513425922137, | |
| "grad_norm": 1.1215344667434692, | |
| "learning_rate": 1.6533515780789758e-05, | |
| "loss": 0.6857, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 0.8871502344728249, | |
| "grad_norm": 1.1322293281555176, | |
| "learning_rate": 1.6520127946327155e-05, | |
| "loss": 0.6723, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 0.8887870430235127, | |
| "grad_norm": 1.7162648439407349, | |
| "learning_rate": 1.6506719752668115e-05, | |
| "loss": 0.679, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 0.8904238515742007, | |
| "grad_norm": 1.5632336139678955, | |
| "learning_rate": 1.6493291241679922e-05, | |
| "loss": 0.6807, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 0.8920606601248885, | |
| "grad_norm": 1.0530614852905273, | |
| "learning_rate": 1.6479842455293297e-05, | |
| "loss": 0.6681, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.8936974686755763, | |
| "grad_norm": 1.2179269790649414, | |
| "learning_rate": 1.6466373435502276e-05, | |
| "loss": 0.6614, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 0.8953342772262642, | |
| "grad_norm": 1.3225027322769165, | |
| "learning_rate": 1.6452884224364082e-05, | |
| "loss": 0.671, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 0.8969710857769521, | |
| "grad_norm": 1.3610303401947021, | |
| "learning_rate": 1.6439374863998966e-05, | |
| "loss": 0.6801, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 0.89860789432764, | |
| "grad_norm": 1.3277727365493774, | |
| "learning_rate": 1.6425845396590114e-05, | |
| "loss": 0.6746, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 0.9002447028783278, | |
| "grad_norm": 1.2963169813156128, | |
| "learning_rate": 1.6412295864383487e-05, | |
| "loss": 0.6817, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.9018815114290157, | |
| "grad_norm": 1.475885033607483, | |
| "learning_rate": 1.6398726309687704e-05, | |
| "loss": 0.6891, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 0.9035183199797036, | |
| "grad_norm": 1.2722758054733276, | |
| "learning_rate": 1.638513677487389e-05, | |
| "loss": 0.6709, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 0.9051551285303915, | |
| "grad_norm": 1.3521857261657715, | |
| "learning_rate": 1.637152730237558e-05, | |
| "loss": 0.6812, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 0.9067919370810793, | |
| "grad_norm": 1.2276744842529297, | |
| "learning_rate": 1.6357897934688555e-05, | |
| "loss": 0.6644, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 0.9084287456317672, | |
| "grad_norm": 1.5432332754135132, | |
| "learning_rate": 1.634424871437071e-05, | |
| "loss": 0.6817, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.910065554182455, | |
| "grad_norm": 1.2314627170562744, | |
| "learning_rate": 1.6330579684041946e-05, | |
| "loss": 0.6761, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 0.9117023627331429, | |
| "grad_norm": 1.473347544670105, | |
| "learning_rate": 1.631689088638401e-05, | |
| "loss": 0.6587, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 0.9133391712838308, | |
| "grad_norm": 1.4029542207717896, | |
| "learning_rate": 1.6303182364140376e-05, | |
| "loss": 0.6863, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 0.9149759798345186, | |
| "grad_norm": 1.1235482692718506, | |
| "learning_rate": 1.628945416011611e-05, | |
| "loss": 0.6717, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 0.9166127883852065, | |
| "grad_norm": 1.1514254808425903, | |
| "learning_rate": 1.6275706317177732e-05, | |
| "loss": 0.6815, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.9182495969358944, | |
| "grad_norm": 1.388074517250061, | |
| "learning_rate": 1.6261938878253086e-05, | |
| "loss": 0.6849, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 0.9198864054865823, | |
| "grad_norm": 1.1814851760864258, | |
| "learning_rate": 1.6248151886331208e-05, | |
| "loss": 0.6641, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 0.9215232140372701, | |
| "grad_norm": 1.4052802324295044, | |
| "learning_rate": 1.6234345384462174e-05, | |
| "loss": 0.6787, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 0.923160022587958, | |
| "grad_norm": 1.5508378744125366, | |
| "learning_rate": 1.6220519415757005e-05, | |
| "loss": 0.6808, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 0.9247968311386459, | |
| "grad_norm": 1.3127562999725342, | |
| "learning_rate": 1.620667402338749e-05, | |
| "loss": 0.6663, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.9264336396893338, | |
| "grad_norm": 1.2677356004714966, | |
| "learning_rate": 1.619280925058607e-05, | |
| "loss": 0.6723, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 0.9280704482400216, | |
| "grad_norm": 1.2480475902557373, | |
| "learning_rate": 1.61789251406457e-05, | |
| "loss": 0.6583, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 0.9297072567907094, | |
| "grad_norm": 1.1523864269256592, | |
| "learning_rate": 1.616502173691973e-05, | |
| "loss": 0.6858, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 0.9313440653413974, | |
| "grad_norm": 1.2443100214004517, | |
| "learning_rate": 1.615109908282174e-05, | |
| "loss": 0.6842, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 0.9329808738920852, | |
| "grad_norm": 1.172663927078247, | |
| "learning_rate": 1.6137157221825418e-05, | |
| "loss": 0.6708, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.9346176824427731, | |
| "grad_norm": 1.2049202919006348, | |
| "learning_rate": 1.6123196197464445e-05, | |
| "loss": 0.6665, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 0.9362544909934609, | |
| "grad_norm": 1.3395051956176758, | |
| "learning_rate": 1.6109216053332313e-05, | |
| "loss": 0.6593, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 0.9378912995441488, | |
| "grad_norm": 1.4670510292053223, | |
| "learning_rate": 1.6095216833082242e-05, | |
| "loss": 0.6715, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 0.9395281080948367, | |
| "grad_norm": 1.349523663520813, | |
| "learning_rate": 1.6081198580427e-05, | |
| "loss": 0.6724, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 0.9411649166455246, | |
| "grad_norm": 1.5846613645553589, | |
| "learning_rate": 1.606716133913879e-05, | |
| "loss": 0.6716, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.9428017251962124, | |
| "grad_norm": 1.1905144453048706, | |
| "learning_rate": 1.6053105153049103e-05, | |
| "loss": 0.6702, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 0.9444385337469003, | |
| "grad_norm": 1.4006574153900146, | |
| "learning_rate": 1.6039030066048592e-05, | |
| "loss": 0.6665, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 0.9460753422975882, | |
| "grad_norm": 1.3038159608840942, | |
| "learning_rate": 1.602493612208693e-05, | |
| "loss": 0.665, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 0.947712150848276, | |
| "grad_norm": 1.336591124534607, | |
| "learning_rate": 1.601082336517266e-05, | |
| "loss": 0.6572, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 0.9493489593989639, | |
| "grad_norm": 1.3096286058425903, | |
| "learning_rate": 1.5996691839373077e-05, | |
| "loss": 0.6651, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.9509857679496517, | |
| "grad_norm": 1.3385711908340454, | |
| "learning_rate": 1.5982541588814083e-05, | |
| "loss": 0.6708, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 0.9526225765003397, | |
| "grad_norm": 1.2425600290298462, | |
| "learning_rate": 1.596837265768004e-05, | |
| "loss": 0.6629, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 0.9542593850510275, | |
| "grad_norm": 1.1755977869033813, | |
| "learning_rate": 1.5954185090213653e-05, | |
| "loss": 0.6618, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 0.9558961936017154, | |
| "grad_norm": 1.5241588354110718, | |
| "learning_rate": 1.5939978930715808e-05, | |
| "loss": 0.6747, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 0.9575330021524032, | |
| "grad_norm": 1.113451361656189, | |
| "learning_rate": 1.5925754223545452e-05, | |
| "loss": 0.6779, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.9591698107030912, | |
| "grad_norm": 1.2721067667007446, | |
| "learning_rate": 1.5911511013119438e-05, | |
| "loss": 0.6586, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 0.960806619253779, | |
| "grad_norm": 1.5037124156951904, | |
| "learning_rate": 1.589724934391241e-05, | |
| "loss": 0.6646, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 0.9624434278044669, | |
| "grad_norm": 1.2813490629196167, | |
| "learning_rate": 1.588296926045664e-05, | |
| "loss": 0.6644, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 0.9640802363551547, | |
| "grad_norm": 1.2610142230987549, | |
| "learning_rate": 1.58686708073419e-05, | |
| "loss": 0.6717, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 0.9657170449058425, | |
| "grad_norm": 1.2408130168914795, | |
| "learning_rate": 1.585435402921532e-05, | |
| "loss": 0.6695, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.9673538534565305, | |
| "grad_norm": 1.4657983779907227, | |
| "learning_rate": 1.584001897078126e-05, | |
| "loss": 0.6777, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 0.9689906620072183, | |
| "grad_norm": 1.370548129081726, | |
| "learning_rate": 1.5825665676801145e-05, | |
| "loss": 0.6881, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 0.9706274705579062, | |
| "grad_norm": 1.3695186376571655, | |
| "learning_rate": 1.5811294192093353e-05, | |
| "loss": 0.6594, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 0.972264279108594, | |
| "grad_norm": 1.2767751216888428, | |
| "learning_rate": 1.5796904561533054e-05, | |
| "loss": 0.6661, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 0.973901087659282, | |
| "grad_norm": 1.293419361114502, | |
| "learning_rate": 1.578249683005209e-05, | |
| "loss": 0.6781, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.9755378962099698, | |
| "grad_norm": 1.5075045824050903, | |
| "learning_rate": 1.576807104263881e-05, | |
| "loss": 0.6706, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 0.9771747047606577, | |
| "grad_norm": 1.1597870588302612, | |
| "learning_rate": 1.5753627244337958e-05, | |
| "loss": 0.6709, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 0.9788115133113455, | |
| "grad_norm": 1.5488371849060059, | |
| "learning_rate": 1.5739165480250504e-05, | |
| "loss": 0.6611, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 0.9804483218620335, | |
| "grad_norm": 1.3339688777923584, | |
| "learning_rate": 1.5724685795533518e-05, | |
| "loss": 0.679, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 0.9820851304127213, | |
| "grad_norm": 1.3151462078094482, | |
| "learning_rate": 1.571018823540004e-05, | |
| "loss": 0.6636, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.9837219389634091, | |
| "grad_norm": 1.3205444812774658, | |
| "learning_rate": 1.5695672845118903e-05, | |
| "loss": 0.6623, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 0.985358747514097, | |
| "grad_norm": 1.294420599937439, | |
| "learning_rate": 1.5681139670014643e-05, | |
| "loss": 0.6666, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 0.9869955560647848, | |
| "grad_norm": 1.3142366409301758, | |
| "learning_rate": 1.566658875546731e-05, | |
| "loss": 0.6629, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 0.9886323646154728, | |
| "grad_norm": 1.3516416549682617, | |
| "learning_rate": 1.565202014691235e-05, | |
| "loss": 0.6664, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 0.9902691731661606, | |
| "grad_norm": 1.2360502481460571, | |
| "learning_rate": 1.5637433889840455e-05, | |
| "loss": 0.6608, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.9919059817168485, | |
| "grad_norm": 1.155104398727417, | |
| "learning_rate": 1.562283002979744e-05, | |
| "loss": 0.6676, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 0.9935427902675363, | |
| "grad_norm": 1.2880823612213135, | |
| "learning_rate": 1.560820861238407e-05, | |
| "loss": 0.6632, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 0.9951795988182243, | |
| "grad_norm": 1.2748744487762451, | |
| "learning_rate": 1.5593569683255936e-05, | |
| "loss": 0.6723, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 0.9968164073689121, | |
| "grad_norm": 1.2065379619598389, | |
| "learning_rate": 1.557891328812332e-05, | |
| "loss": 0.6831, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 0.9984532159196, | |
| "grad_norm": 1.143071174621582, | |
| "learning_rate": 1.5564239472751022e-05, | |
| "loss": 0.6656, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 1.0000818404275345, | |
| "grad_norm": 1.1476441621780396, | |
| "learning_rate": 1.5549548282958253e-05, | |
| "loss": 0.6591, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 1.0017186489782222, | |
| "grad_norm": 1.210295557975769, | |
| "learning_rate": 1.5534839764618477e-05, | |
| "loss": 0.6559, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 1.00335545752891, | |
| "grad_norm": 1.5003302097320557, | |
| "learning_rate": 1.5520113963659257e-05, | |
| "loss": 0.6615, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 1.004992266079598, | |
| "grad_norm": 1.235449194908142, | |
| "learning_rate": 1.550537092606212e-05, | |
| "loss": 0.6709, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 1.006629074630286, | |
| "grad_norm": 1.1739157438278198, | |
| "learning_rate": 1.549061069786243e-05, | |
| "loss": 0.668, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 1.0082658831809737, | |
| "grad_norm": 1.2646570205688477, | |
| "learning_rate": 1.5475833325149215e-05, | |
| "loss": 0.6553, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 1.0099026917316616, | |
| "grad_norm": 1.2951397895812988, | |
| "learning_rate": 1.546103885406504e-05, | |
| "loss": 0.6584, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 1.0115395002823495, | |
| "grad_norm": 1.2838189601898193, | |
| "learning_rate": 1.544622733080586e-05, | |
| "loss": 0.6518, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 1.0131763088330374, | |
| "grad_norm": 1.3708552122116089, | |
| "learning_rate": 1.543139880162088e-05, | |
| "loss": 0.6628, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 1.0148131173837251, | |
| "grad_norm": 1.301353931427002, | |
| "learning_rate": 1.54165533128124e-05, | |
| "loss": 0.6478, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 1.016449925934413, | |
| "grad_norm": 1.3044975996017456, | |
| "learning_rate": 1.5401690910735677e-05, | |
| "loss": 0.6439, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 1.018086734485101, | |
| "grad_norm": 1.4568370580673218, | |
| "learning_rate": 1.5386811641798785e-05, | |
| "loss": 0.6482, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 1.0197235430357887, | |
| "grad_norm": 1.3758224248886108, | |
| "learning_rate": 1.5371915552462466e-05, | |
| "loss": 0.663, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 1.0213603515864766, | |
| "grad_norm": 1.6428395509719849, | |
| "learning_rate": 1.535700268923998e-05, | |
| "loss": 0.6533, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 1.0229971601371646, | |
| "grad_norm": 1.3830885887145996, | |
| "learning_rate": 1.5342073098696956e-05, | |
| "loss": 0.6632, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 1.0246339686878525, | |
| "grad_norm": 1.426006555557251, | |
| "learning_rate": 1.5327126827451272e-05, | |
| "loss": 0.6491, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 1.0262707772385402, | |
| "grad_norm": 1.4166696071624756, | |
| "learning_rate": 1.531216392217288e-05, | |
| "loss": 0.6465, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 1.0279075857892281, | |
| "grad_norm": 1.224443793296814, | |
| "learning_rate": 1.529718442958367e-05, | |
| "loss": 0.6642, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 1.029544394339916, | |
| "grad_norm": 1.250406265258789, | |
| "learning_rate": 1.528218839645733e-05, | |
| "loss": 0.6516, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 1.031181202890604, | |
| "grad_norm": 1.2630037069320679, | |
| "learning_rate": 1.52671758696192e-05, | |
| "loss": 0.6649, | |
| "step": 63000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 183285, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.34907099427588e+19, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |