| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9181102994761033, |
| "eval_steps": 500, |
| "global_step": 20000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002295275748690258, |
| "grad_norm": 2.0098984241485596, |
| "learning_rate": 1.4856966372720683e-06, |
| "loss": 2.0874, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.004590551497380516, |
| "grad_norm": 7.3466691970825195, |
| "learning_rate": 1.7773258340037704e-06, |
| "loss": 2.0765, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.006885827246070775, |
| "grad_norm": 64.36688232421875, |
| "learning_rate": 1.9369412440149804e-06, |
| "loss": 1.9359, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.009181102994761032, |
| "grad_norm": 30.22006607055664, |
| "learning_rate": 2.0539675208065135e-06, |
| "loss": 1.4178, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.01147637874345129, |
| "grad_norm": 26.016658782958984, |
| "learning_rate": 2.1422693353722617e-06, |
| "loss": 1.0832, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.01377165449214155, |
| "grad_norm": 20.14085578918457, |
| "learning_rate": 2.2142328937343315e-06, |
| "loss": 0.9394, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.01606693024083181, |
| "grad_norm": 37.867881774902344, |
| "learning_rate": 2.2761167318484284e-06, |
| "loss": 0.8149, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.018362205989522064, |
| "grad_norm": 29.71302604675293, |
| "learning_rate": 2.329517989724819e-06, |
| "loss": 0.7927, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.020657481738212323, |
| "grad_norm": 23.509214401245117, |
| "learning_rate": 2.3764842614654632e-06, |
| "loss": 0.7256, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.02295275748690258, |
| "grad_norm": 25.001569747924805, |
| "learning_rate": 2.418400955810514e-06, |
| "loss": 0.6645, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.02524803323559284, |
| "grad_norm": 17.22730827331543, |
| "learning_rate": 2.4562492431761594e-06, |
| "loss": 0.6266, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.0275433089842831, |
| "grad_norm": 27.04228973388672, |
| "learning_rate": 2.490749474698331e-06, |
| "loss": 0.6193, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.029838584732973357, |
| "grad_norm": 29.859375, |
| "learning_rate": 2.5224461471278787e-06, |
| "loss": 0.6038, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.03213386048166362, |
| "grad_norm": 23.304059982299805, |
| "learning_rate": 2.5517608604098523e-06, |
| "loss": 0.5889, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.034429136230353874, |
| "grad_norm": 34.74980163574219, |
| "learning_rate": 2.5790267286571216e-06, |
| "loss": 0.5936, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.03672441197904413, |
| "grad_norm": 25.56021499633789, |
| "learning_rate": 2.6045115312613743e-06, |
| "loss": 0.5748, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.03901968772773439, |
| "grad_norm": 12.490568161010742, |
| "learning_rate": 2.6284337551622617e-06, |
| "loss": 0.558, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.041314963476424646, |
| "grad_norm": 17.215822219848633, |
| "learning_rate": 2.6509739968294195e-06, |
| "loss": 0.5535, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.04361023922511491, |
| "grad_norm": 21.178848266601562, |
| "learning_rate": 2.6722832469155484e-06, |
| "loss": 0.5271, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.04590551497380516, |
| "grad_norm": 14.22229290008545, |
| "learning_rate": 2.6924890275135524e-06, |
| "loss": 0.5421, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.048200790722495425, |
| "grad_norm": 77.64906311035156, |
| "learning_rate": 2.711700017156508e-06, |
| "loss": 0.5329, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.05049606647118568, |
| "grad_norm": 25.775665283203125, |
| "learning_rate": 2.73000958979208e-06, |
| "loss": 0.5169, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.05279134221987594, |
| "grad_norm": 16.82468032836914, |
| "learning_rate": 2.7471563517988443e-06, |
| "loss": 0.4989, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.0550866179685662, |
| "grad_norm": 20.88793182373047, |
| "learning_rate": 2.7639095036115487e-06, |
| "loss": 0.5076, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.05738189371725646, |
| "grad_norm": 15.96468448638916, |
| "learning_rate": 2.779973026465106e-06, |
| "loss": 0.4996, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.059677169465946714, |
| "grad_norm": 29.40988540649414, |
| "learning_rate": 2.7954014586854842e-06, |
| "loss": 0.523, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.061972445214636976, |
| "grad_norm": 19.3817081451416, |
| "learning_rate": 2.810243114309793e-06, |
| "loss": 0.4857, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.06426772096332724, |
| "grad_norm": 30.938968658447266, |
| "learning_rate": 2.824540995622056e-06, |
| "loss": 0.4948, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.06656299671201749, |
| "grad_norm": 26.087671279907227, |
| "learning_rate": 2.83833354435266e-06, |
| "loss": 0.4907, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.06885827246070775, |
| "grad_norm": 16.257780075073242, |
| "learning_rate": 2.8516552646048146e-06, |
| "loss": 0.4792, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.07115354820939801, |
| "grad_norm": 20.534271240234375, |
| "learning_rate": 2.8645372429265973e-06, |
| "loss": 0.4889, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.07344882395808826, |
| "grad_norm": 13.257925987243652, |
| "learning_rate": 2.877007585258154e-06, |
| "loss": 0.4745, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.07574409970677852, |
| "grad_norm": 14.961082458496094, |
| "learning_rate": 2.889091786204755e-06, |
| "loss": 0.4922, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.07803937545546878, |
| "grad_norm": 15.788146018981934, |
| "learning_rate": 2.90081304283633e-06, |
| "loss": 0.4781, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.08033465120415904, |
| "grad_norm": 15.50967025756836, |
| "learning_rate": 2.912192522722599e-06, |
| "loss": 0.4875, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.08262992695284929, |
| "grad_norm": 13.156537055969238, |
| "learning_rate": 2.9232495939864444e-06, |
| "loss": 0.4703, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.08492520270153955, |
| "grad_norm": 32.00178909301758, |
| "learning_rate": 2.9340020236565454e-06, |
| "loss": 0.4702, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.08722047845022982, |
| "grad_norm": 19.58460807800293, |
| "learning_rate": 2.9444661494209185e-06, |
| "loss": 0.4938, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.08951575419892008, |
| "grad_norm": 40.492218017578125, |
| "learning_rate": 2.954657028950126e-06, |
| "loss": 0.4523, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.09181102994761033, |
| "grad_norm": 84.74713897705078, |
| "learning_rate": 2.9645885702160444e-06, |
| "loss": 0.4712, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.09410630569630059, |
| "grad_norm": 26.168777465820312, |
| "learning_rate": 2.9742736456367274e-06, |
| "loss": 0.4397, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.09640158144499085, |
| "grad_norm": 22.866653442382812, |
| "learning_rate": 2.9837241923979606e-06, |
| "loss": 0.4404, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.09869685719368111, |
| "grad_norm": 20.17527198791504, |
| "learning_rate": 2.992951300912944e-06, |
| "loss": 0.4592, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.10099213294237136, |
| "grad_norm": 24.15152359008789, |
| "learning_rate": 2.998469700061212e-06, |
| "loss": 0.4293, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.10328740869106162, |
| "grad_norm": 35.48339080810547, |
| "learning_rate": 2.990818200367272e-06, |
| "loss": 0.447, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.10558268443975188, |
| "grad_norm": 19.331541061401367, |
| "learning_rate": 2.9831667006733323e-06, |
| "loss": 0.4335, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.10787796018844215, |
| "grad_norm": 16.164037704467773, |
| "learning_rate": 2.975515200979392e-06, |
| "loss": 0.4428, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.1101732359371324, |
| "grad_norm": 14.07393741607666, |
| "learning_rate": 2.967863701285452e-06, |
| "loss": 0.4337, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.11246851168582266, |
| "grad_norm": 18.08838653564453, |
| "learning_rate": 2.960212201591512e-06, |
| "loss": 0.4366, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.11476378743451292, |
| "grad_norm": 20.341272354125977, |
| "learning_rate": 2.952560701897572e-06, |
| "loss": 0.4624, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.11705906318320317, |
| "grad_norm": 17.554807662963867, |
| "learning_rate": 2.944909202203632e-06, |
| "loss": 0.4517, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.11935433893189343, |
| "grad_norm": 12.912613868713379, |
| "learning_rate": 2.9372577025096922e-06, |
| "loss": 0.4191, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.12164961468058369, |
| "grad_norm": 11.591246604919434, |
| "learning_rate": 2.9296062028157517e-06, |
| "loss": 0.4577, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.12394489042927395, |
| "grad_norm": 12.689074516296387, |
| "learning_rate": 2.921954703121812e-06, |
| "loss": 0.4291, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.1262401661779642, |
| "grad_norm": 12.471826553344727, |
| "learning_rate": 2.914303203427872e-06, |
| "loss": 0.4528, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.12853544192665448, |
| "grad_norm": 12.39678955078125, |
| "learning_rate": 2.906804733727811e-06, |
| "loss": 0.4328, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.1308307176753447, |
| "grad_norm": 15.050296783447266, |
| "learning_rate": 2.8991532340338704e-06, |
| "loss": 0.4258, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.13312599342403497, |
| "grad_norm": 18.09667205810547, |
| "learning_rate": 2.8915017343399307e-06, |
| "loss": 0.4313, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.13542126917272523, |
| "grad_norm": 11.64979362487793, |
| "learning_rate": 2.8838502346459907e-06, |
| "loss": 0.4306, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.1377165449214155, |
| "grad_norm": 13.034558296203613, |
| "learning_rate": 2.8761987349520506e-06, |
| "loss": 0.4337, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.14001182067010576, |
| "grad_norm": 18.609317779541016, |
| "learning_rate": 2.868547235258111e-06, |
| "loss": 0.4308, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.14230709641879602, |
| "grad_norm": 12.441306114196777, |
| "learning_rate": 2.8608957355641704e-06, |
| "loss": 0.4267, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.14460237216748628, |
| "grad_norm": 20.22740364074707, |
| "learning_rate": 2.8532442358702308e-06, |
| "loss": 0.4229, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.14689764791617652, |
| "grad_norm": 14.57115650177002, |
| "learning_rate": 2.8455927361762907e-06, |
| "loss": 0.4196, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.14919292366486678, |
| "grad_norm": 15.352262496948242, |
| "learning_rate": 2.8379412364823506e-06, |
| "loss": 0.4367, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.15148819941355704, |
| "grad_norm": 36.657840728759766, |
| "learning_rate": 2.8302897367884105e-06, |
| "loss": 0.4529, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.1537834751622473, |
| "grad_norm": 14.538572311401367, |
| "learning_rate": 2.822638237094471e-06, |
| "loss": 0.4098, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.15607875091093756, |
| "grad_norm": 14.28237533569336, |
| "learning_rate": 2.8149867374005303e-06, |
| "loss": 0.4128, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.15837402665962783, |
| "grad_norm": 13.038817405700684, |
| "learning_rate": 2.8073352377065907e-06, |
| "loss": 0.4396, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.1606693024083181, |
| "grad_norm": 12.356093406677246, |
| "learning_rate": 2.799836768006529e-06, |
| "loss": 0.4445, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.16296457815700835, |
| "grad_norm": 11.58459186553955, |
| "learning_rate": 2.7921852683125895e-06, |
| "loss": 0.4383, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.16525985390569858, |
| "grad_norm": 13.039240837097168, |
| "learning_rate": 2.784533768618649e-06, |
| "loss": 0.4109, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.16755512965438885, |
| "grad_norm": 34.44761276245117, |
| "learning_rate": 2.7768822689247094e-06, |
| "loss": 0.427, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.1698504054030791, |
| "grad_norm": 17.821250915527344, |
| "learning_rate": 2.7692307692307693e-06, |
| "loss": 0.4197, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.17214568115176937, |
| "grad_norm": 21.884822845458984, |
| "learning_rate": 2.761579269536829e-06, |
| "loss": 0.4071, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.17444095690045963, |
| "grad_norm": 18.419849395751953, |
| "learning_rate": 2.7539277698428895e-06, |
| "loss": 0.4531, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.1767362326491499, |
| "grad_norm": 13.88978099822998, |
| "learning_rate": 2.746276270148949e-06, |
| "loss": 0.4231, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.17903150839784016, |
| "grad_norm": 18.00520133972168, |
| "learning_rate": 2.7386247704550094e-06, |
| "loss": 0.4307, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.18132678414653042, |
| "grad_norm": 11.312788963317871, |
| "learning_rate": 2.7309732707610693e-06, |
| "loss": 0.4236, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.18362205989522065, |
| "grad_norm": 18.427597045898438, |
| "learning_rate": 2.723321771067129e-06, |
| "loss": 0.4188, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.1859173356439109, |
| "grad_norm": 15.946565628051758, |
| "learning_rate": 2.715670271373189e-06, |
| "loss": 0.4294, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.18821261139260118, |
| "grad_norm": 10.010071754455566, |
| "learning_rate": 2.7080187716792494e-06, |
| "loss": 0.4069, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.19050788714129144, |
| "grad_norm": 14.598346710205078, |
| "learning_rate": 2.700367271985309e-06, |
| "loss": 0.4198, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.1928031628899817, |
| "grad_norm": 14.54223918914795, |
| "learning_rate": 2.6927157722913693e-06, |
| "loss": 0.4051, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.19509843863867196, |
| "grad_norm": 22.899091720581055, |
| "learning_rate": 2.685064272597429e-06, |
| "loss": 0.418, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.19739371438736222, |
| "grad_norm": 11.836400032043457, |
| "learning_rate": 2.677412772903489e-06, |
| "loss": 0.4164, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.19968899013605246, |
| "grad_norm": 10.089373588562012, |
| "learning_rate": 2.669761273209549e-06, |
| "loss": 0.4088, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.20198426588474272, |
| "grad_norm": 13.1040620803833, |
| "learning_rate": 2.662109773515609e-06, |
| "loss": 0.4104, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.20427954163343298, |
| "grad_norm": 19.278318405151367, |
| "learning_rate": 2.6544582738216693e-06, |
| "loss": 0.4109, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.20657481738212324, |
| "grad_norm": 22.169904708862305, |
| "learning_rate": 2.646806774127729e-06, |
| "loss": 0.4067, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.2088700931308135, |
| "grad_norm": 39.922847747802734, |
| "learning_rate": 2.639155274433789e-06, |
| "loss": 0.4035, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.21116536887950377, |
| "grad_norm": 13.038665771484375, |
| "learning_rate": 2.631503774739849e-06, |
| "loss": 0.4208, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.21346064462819403, |
| "grad_norm": 8.684808731079102, |
| "learning_rate": 2.6238522750459094e-06, |
| "loss": 0.4071, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.2157559203768843, |
| "grad_norm": 19.35813331604004, |
| "learning_rate": 2.616200775351969e-06, |
| "loss": 0.415, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.21805119612557453, |
| "grad_norm": 13.930493354797363, |
| "learning_rate": 2.608549275658029e-06, |
| "loss": 0.4126, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.2203464718742648, |
| "grad_norm": 11.336668968200684, |
| "learning_rate": 2.600897775964089e-06, |
| "loss": 0.4006, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.22264174762295505, |
| "grad_norm": 11.625572204589844, |
| "learning_rate": 2.593246276270149e-06, |
| "loss": 0.4016, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.2249370233716453, |
| "grad_norm": 13.675556182861328, |
| "learning_rate": 2.585594776576209e-06, |
| "loss": 0.4002, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.22723229912033557, |
| "grad_norm": 10.017621994018555, |
| "learning_rate": 2.577943276882269e-06, |
| "loss": 0.3831, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.22952757486902584, |
| "grad_norm": 9.974747657775879, |
| "learning_rate": 2.570444807182208e-06, |
| "loss": 0.4271, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.2318228506177161, |
| "grad_norm": 17.395328521728516, |
| "learning_rate": 2.5627933074882677e-06, |
| "loss": 0.4088, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.23411812636640633, |
| "grad_norm": 11.723817825317383, |
| "learning_rate": 2.5551418077943276e-06, |
| "loss": 0.3957, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.2364134021150966, |
| "grad_norm": 11.623498916625977, |
| "learning_rate": 2.5474903081003875e-06, |
| "loss": 0.4107, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.23870867786378686, |
| "grad_norm": 18.517606735229492, |
| "learning_rate": 2.539838808406448e-06, |
| "loss": 0.4228, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.24100395361247712, |
| "grad_norm": 16.989261627197266, |
| "learning_rate": 2.532187308712508e-06, |
| "loss": 0.4057, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.24329922936116738, |
| "grad_norm": 14.439526557922363, |
| "learning_rate": 2.5245358090185677e-06, |
| "loss": 0.4152, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.24559450510985764, |
| "grad_norm": 17.96590805053711, |
| "learning_rate": 2.5168843093246276e-06, |
| "loss": 0.412, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.2478897808585479, |
| "grad_norm": 18.715192794799805, |
| "learning_rate": 2.509232809630688e-06, |
| "loss": 0.3926, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.25018505660723817, |
| "grad_norm": 9.67862606048584, |
| "learning_rate": 2.5015813099367475e-06, |
| "loss": 0.3897, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.2524803323559284, |
| "grad_norm": 10.955434799194336, |
| "learning_rate": 2.493929810242808e-06, |
| "loss": 0.4127, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.2547756081046187, |
| "grad_norm": 13.451887130737305, |
| "learning_rate": 2.4862783105488677e-06, |
| "loss": 0.4027, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.25707088385330895, |
| "grad_norm": 17.29932403564453, |
| "learning_rate": 2.4786268108549276e-06, |
| "loss": 0.3976, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.25936615960199916, |
| "grad_norm": 9.819147109985352, |
| "learning_rate": 2.4709753111609876e-06, |
| "loss": 0.3667, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.2616614353506894, |
| "grad_norm": 25.263425827026367, |
| "learning_rate": 2.4633238114670475e-06, |
| "loss": 0.3816, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.2639567110993797, |
| "grad_norm": 12.653718948364258, |
| "learning_rate": 2.4556723117731074e-06, |
| "loss": 0.4104, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.26625198684806994, |
| "grad_norm": 25.030275344848633, |
| "learning_rate": 2.4480208120791677e-06, |
| "loss": 0.3916, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.2685472625967602, |
| "grad_norm": 11.694486618041992, |
| "learning_rate": 2.4403693123852272e-06, |
| "loss": 0.3901, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.27084253834545047, |
| "grad_norm": 13.822489738464355, |
| "learning_rate": 2.4327178126912876e-06, |
| "loss": 0.3861, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.27313781409414073, |
| "grad_norm": 13.548593521118164, |
| "learning_rate": 2.4250663129973475e-06, |
| "loss": 0.4043, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.275433089842831, |
| "grad_norm": 16.44365119934082, |
| "learning_rate": 2.4174148133034074e-06, |
| "loss": 0.3927, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.27772836559152125, |
| "grad_norm": 23.707990646362305, |
| "learning_rate": 2.4099163436033463e-06, |
| "loss": 0.3864, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.2800236413402115, |
| "grad_norm": 14.827530860900879, |
| "learning_rate": 2.4022648439094062e-06, |
| "loss": 0.3904, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.2823189170889018, |
| "grad_norm": 21.141281127929688, |
| "learning_rate": 2.394766374209345e-06, |
| "loss": 0.4152, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.28461419283759204, |
| "grad_norm": 20.442838668823242, |
| "learning_rate": 2.387114874515405e-06, |
| "loss": 0.4072, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.2869094685862823, |
| "grad_norm": 10.607434272766113, |
| "learning_rate": 2.379463374821465e-06, |
| "loss": 0.4083, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.28920474433497256, |
| "grad_norm": 15.464150428771973, |
| "learning_rate": 2.371811875127525e-06, |
| "loss": 0.3963, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.2915000200836628, |
| "grad_norm": 11.382309913635254, |
| "learning_rate": 2.3641603754335853e-06, |
| "loss": 0.3966, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.29379529583235303, |
| "grad_norm": 13.88147258758545, |
| "learning_rate": 2.3565088757396448e-06, |
| "loss": 0.3961, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.2960905715810433, |
| "grad_norm": 11.285943031311035, |
| "learning_rate": 2.348857376045705e-06, |
| "loss": 0.3667, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.29838584732973356, |
| "grad_norm": 13.869823455810547, |
| "learning_rate": 2.341205876351765e-06, |
| "loss": 0.4129, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.3006811230784238, |
| "grad_norm": 31.149734497070312, |
| "learning_rate": 2.333554376657825e-06, |
| "loss": 0.4148, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.3029763988271141, |
| "grad_norm": 125.76400756835938, |
| "learning_rate": 2.325902876963885e-06, |
| "loss": 0.3898, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.30527167457580434, |
| "grad_norm": 12.42544937133789, |
| "learning_rate": 2.3182513772699448e-06, |
| "loss": 0.406, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.3075669503244946, |
| "grad_norm": 17.587514877319336, |
| "learning_rate": 2.310599877576005e-06, |
| "loss": 0.3945, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.30986222607318487, |
| "grad_norm": 10.764501571655273, |
| "learning_rate": 2.302948377882065e-06, |
| "loss": 0.3842, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.3121575018218751, |
| "grad_norm": 9.208351135253906, |
| "learning_rate": 2.295296878188125e-06, |
| "loss": 0.3923, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.3144527775705654, |
| "grad_norm": 11.656021118164062, |
| "learning_rate": 2.287645378494185e-06, |
| "loss": 0.3886, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.31674805331925565, |
| "grad_norm": 12.415077209472656, |
| "learning_rate": 2.279993878800245e-06, |
| "loss": 0.3739, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.3190433290679459, |
| "grad_norm": 9.68492317199707, |
| "learning_rate": 2.2723423791063047e-06, |
| "loss": 0.3891, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.3213386048166362, |
| "grad_norm": 10.831034660339355, |
| "learning_rate": 2.264690879412365e-06, |
| "loss": 0.402, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.32363388056532644, |
| "grad_norm": 15.833775520324707, |
| "learning_rate": 2.257039379718425e-06, |
| "loss": 0.3936, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.3259291563140167, |
| "grad_norm": 20.576457977294922, |
| "learning_rate": 2.249387880024485e-06, |
| "loss": 0.3855, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.32822443206270696, |
| "grad_norm": 17.22242546081543, |
| "learning_rate": 2.2417363803305448e-06, |
| "loss": 0.4012, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.33051970781139717, |
| "grad_norm": 9.966532707214355, |
| "learning_rate": 2.2340848806366047e-06, |
| "loss": 0.3984, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.33281498356008743, |
| "grad_norm": 7.574636936187744, |
| "learning_rate": 2.2264333809426646e-06, |
| "loss": 0.38, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.3351102593087777, |
| "grad_norm": 11.461008071899414, |
| "learning_rate": 2.218781881248725e-06, |
| "loss": 0.3902, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.33740553505746795, |
| "grad_norm": 17.567983627319336, |
| "learning_rate": 2.2111303815547844e-06, |
| "loss": 0.3944, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.3397008108061582, |
| "grad_norm": 11.135239601135254, |
| "learning_rate": 2.2036319118547234e-06, |
| "loss": 0.3916, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.3419960865548485, |
| "grad_norm": 9.449058532714844, |
| "learning_rate": 2.1959804121607837e-06, |
| "loss": 0.3929, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.34429136230353874, |
| "grad_norm": 8.860933303833008, |
| "learning_rate": 2.1883289124668436e-06, |
| "loss": 0.3831, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.346586638052229, |
| "grad_norm": 15.684256553649902, |
| "learning_rate": 2.1806774127729035e-06, |
| "loss": 0.3954, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.34888191380091926, |
| "grad_norm": 9.633450508117676, |
| "learning_rate": 2.1730259130789634e-06, |
| "loss": 0.3965, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.3511771895496095, |
| "grad_norm": 9.775280952453613, |
| "learning_rate": 2.165374413385024e-06, |
| "loss": 0.3858, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.3534724652982998, |
| "grad_norm": 18.91486930847168, |
| "learning_rate": 2.1577229136910833e-06, |
| "loss": 0.4009, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.35576774104699005, |
| "grad_norm": 9.630500793457031, |
| "learning_rate": 2.1500714139971436e-06, |
| "loss": 0.4133, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.3580630167956803, |
| "grad_norm": 10.628037452697754, |
| "learning_rate": 2.1424199143032035e-06, |
| "loss": 0.3597, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.3603582925443706, |
| "grad_norm": 12.760895729064941, |
| "learning_rate": 2.1347684146092635e-06, |
| "loss": 0.3967, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.36265356829306084, |
| "grad_norm": 20.6715145111084, |
| "learning_rate": 2.1271169149153234e-06, |
| "loss": 0.384, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.36494884404175104, |
| "grad_norm": 9.833721160888672, |
| "learning_rate": 2.1196184452152623e-06, |
| "loss": 0.3994, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.3672441197904413, |
| "grad_norm": 11.794584274291992, |
| "learning_rate": 2.1119669455213222e-06, |
| "loss": 0.3819, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.36953939553913157, |
| "grad_norm": 11.88609790802002, |
| "learning_rate": 2.104315445827382e-06, |
| "loss": 0.3848, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.3718346712878218, |
| "grad_norm": 18.272483825683594, |
| "learning_rate": 2.0966639461334425e-06, |
| "loss": 0.3828, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.3741299470365121, |
| "grad_norm": 12.808188438415527, |
| "learning_rate": 2.089012446439502e-06, |
| "loss": 0.3771, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.37642522278520235, |
| "grad_norm": 17.478059768676758, |
| "learning_rate": 2.0813609467455623e-06, |
| "loss": 0.4058, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.3787204985338926, |
| "grad_norm": 9.85326099395752, |
| "learning_rate": 2.0737094470516222e-06, |
| "loss": 0.3605, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.3810157742825829, |
| "grad_norm": 11.933965682983398, |
| "learning_rate": 2.066057947357682e-06, |
| "loss": 0.3979, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.38331105003127314, |
| "grad_norm": 14.13598346710205, |
| "learning_rate": 2.058406447663742e-06, |
| "loss": 0.372, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.3856063257799634, |
| "grad_norm": 18.427085876464844, |
| "learning_rate": 2.0507549479698024e-06, |
| "loss": 0.382, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.38790160152865366, |
| "grad_norm": 11.309691429138184, |
| "learning_rate": 2.043103448275862e-06, |
| "loss": 0.3965, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.3901968772773439, |
| "grad_norm": 26.351280212402344, |
| "learning_rate": 2.0354519485819222e-06, |
| "loss": 0.3794, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.3924921530260342, |
| "grad_norm": 39.77188491821289, |
| "learning_rate": 2.027800448887982e-06, |
| "loss": 0.3715, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.39478742877472445, |
| "grad_norm": 11.656793594360352, |
| "learning_rate": 2.020148949194042e-06, |
| "loss": 0.3724, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.3970827045234147, |
| "grad_norm": 10.332427978515625, |
| "learning_rate": 2.012497449500102e-06, |
| "loss": 0.3913, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.3993779802721049, |
| "grad_norm": 10.971294403076172, |
| "learning_rate": 2.004845949806162e-06, |
| "loss": 0.3852, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.4016732560207952, |
| "grad_norm": 11.635868072509766, |
| "learning_rate": 1.997194450112222e-06, |
| "loss": 0.3918, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.40396853176948544, |
| "grad_norm": 14.208995819091797, |
| "learning_rate": 1.989542950418282e-06, |
| "loss": 0.38, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.4062638075181757, |
| "grad_norm": 14.743267059326172, |
| "learning_rate": 1.981891450724342e-06, |
| "loss": 0.378, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.40855908326686596, |
| "grad_norm": 11.725529670715332, |
| "learning_rate": 1.974239951030402e-06, |
| "loss": 0.3592, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.4108543590155562, |
| "grad_norm": 9.404533386230469, |
| "learning_rate": 1.966741481330341e-06, |
| "loss": 0.4008, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.4131496347642465, |
| "grad_norm": 10.354850769042969, |
| "learning_rate": 1.959089981636401e-06, |
| "loss": 0.3891, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.41544491051293675, |
| "grad_norm": 10.396027565002441, |
| "learning_rate": 1.9515915119363398e-06, |
| "loss": 0.3756, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.417740186261627, |
| "grad_norm": 14.872404098510742, |
| "learning_rate": 1.9439400122423997e-06, |
| "loss": 0.3672, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.4200354620103173, |
| "grad_norm": 12.267988204956055, |
| "learning_rate": 1.9362885125484596e-06, |
| "loss": 0.3694, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.42233073775900754, |
| "grad_norm": 10.50540828704834, |
| "learning_rate": 1.9286370128545195e-06, |
| "loss": 0.3746, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.4246260135076978, |
| "grad_norm": 9.12032699584961, |
| "learning_rate": 1.9209855131605794e-06, |
| "loss": 0.3739, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.42692128925638806, |
| "grad_norm": 19.14887046813965, |
| "learning_rate": 1.9133340134666393e-06, |
| "loss": 0.3725, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.4292165650050783, |
| "grad_norm": 12.185490608215332, |
| "learning_rate": 1.9056825137726995e-06, |
| "loss": 0.3742, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.4315118407537686, |
| "grad_norm": 6.92899751663208, |
| "learning_rate": 1.8980310140787594e-06, |
| "loss": 0.3818, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.4338071165024588, |
| "grad_norm": 11.001760482788086, |
| "learning_rate": 1.8905325443786983e-06, |
| "loss": 0.3744, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.43610239225114905, |
| "grad_norm": 14.638999938964844, |
| "learning_rate": 1.8828810446847584e-06, |
| "loss": 0.3873, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.4383976679998393, |
| "grad_norm": 11.44972038269043, |
| "learning_rate": 1.8752295449908181e-06, |
| "loss": 0.3743, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.4406929437485296, |
| "grad_norm": 10.270658493041992, |
| "learning_rate": 1.8675780452968783e-06, |
| "loss": 0.4017, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.44298821949721984, |
| "grad_norm": 55.21454620361328, |
| "learning_rate": 1.8599265456029382e-06, |
| "loss": 0.3776, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.4452834952459101, |
| "grad_norm": 16.605663299560547, |
| "learning_rate": 1.8522750459089983e-06, |
| "loss": 0.3807, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.44757877099460036, |
| "grad_norm": 10.325900077819824, |
| "learning_rate": 1.8446235462150582e-06, |
| "loss": 0.3842, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.4498740467432906, |
| "grad_norm": 11.08858585357666, |
| "learning_rate": 1.8369720465211184e-06, |
| "loss": 0.3913, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.4521693224919809, |
| "grad_norm": 11.547320365905762, |
| "learning_rate": 1.829320546827178e-06, |
| "loss": 0.3857, |
| "step": 9850 |
| }, |
| { |
| "epoch": 0.45446459824067115, |
| "grad_norm": 7.3234171867370605, |
| "learning_rate": 1.8216690471332382e-06, |
| "loss": 0.3591, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.4567598739893614, |
| "grad_norm": 15.973671913146973, |
| "learning_rate": 1.8140175474392981e-06, |
| "loss": 0.3862, |
| "step": 9950 |
| }, |
| { |
| "epoch": 0.45905514973805167, |
| "grad_norm": 9.876672744750977, |
| "learning_rate": 1.8063660477453582e-06, |
| "loss": 0.3736, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.46135042548674193, |
| "grad_norm": 10.364340782165527, |
| "learning_rate": 1.798714548051418e-06, |
| "loss": 0.3753, |
| "step": 10050 |
| }, |
| { |
| "epoch": 0.4636457012354322, |
| "grad_norm": 12.696479797363281, |
| "learning_rate": 1.791063048357478e-06, |
| "loss": 0.3925, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.46594097698412246, |
| "grad_norm": 9.908900260925293, |
| "learning_rate": 1.783411548663538e-06, |
| "loss": 0.3909, |
| "step": 10150 |
| }, |
| { |
| "epoch": 0.46823625273281266, |
| "grad_norm": 9.967103958129883, |
| "learning_rate": 1.7757600489695981e-06, |
| "loss": 0.3645, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.4705315284815029, |
| "grad_norm": 18.3125057220459, |
| "learning_rate": 1.7681085492756582e-06, |
| "loss": 0.3909, |
| "step": 10250 |
| }, |
| { |
| "epoch": 0.4728268042301932, |
| "grad_norm": 9.652514457702637, |
| "learning_rate": 1.760457049581718e-06, |
| "loss": 0.3677, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.47512207997888345, |
| "grad_norm": 11.558232307434082, |
| "learning_rate": 1.7528055498877783e-06, |
| "loss": 0.3898, |
| "step": 10350 |
| }, |
| { |
| "epoch": 0.4774173557275737, |
| "grad_norm": 12.501564025878906, |
| "learning_rate": 1.745154050193838e-06, |
| "loss": 0.3712, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.479712631476264, |
| "grad_norm": 16.78426742553711, |
| "learning_rate": 1.7375025504998981e-06, |
| "loss": 0.3563, |
| "step": 10450 |
| }, |
| { |
| "epoch": 0.48200790722495424, |
| "grad_norm": 17.035160064697266, |
| "learning_rate": 1.729851050805958e-06, |
| "loss": 0.3754, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.4843031829736445, |
| "grad_norm": 10.664167404174805, |
| "learning_rate": 1.7221995511120182e-06, |
| "loss": 0.396, |
| "step": 10550 |
| }, |
| { |
| "epoch": 0.48659845872233476, |
| "grad_norm": 11.713300704956055, |
| "learning_rate": 1.7145480514180779e-06, |
| "loss": 0.3612, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.488893734471025, |
| "grad_norm": 10.911885261535645, |
| "learning_rate": 1.706896551724138e-06, |
| "loss": 0.3943, |
| "step": 10650 |
| }, |
| { |
| "epoch": 0.4911890102197153, |
| "grad_norm": 20.872018814086914, |
| "learning_rate": 1.699245052030198e-06, |
| "loss": 0.3781, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.49348428596840554, |
| "grad_norm": 10.593243598937988, |
| "learning_rate": 1.691593552336258e-06, |
| "loss": 0.3885, |
| "step": 10750 |
| }, |
| { |
| "epoch": 0.4957795617170958, |
| "grad_norm": 14.705113410949707, |
| "learning_rate": 1.6839420526423177e-06, |
| "loss": 0.364, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.49807483746578607, |
| "grad_norm": 10.650996208190918, |
| "learning_rate": 1.6762905529483779e-06, |
| "loss": 0.3723, |
| "step": 10850 |
| }, |
| { |
| "epoch": 0.5003701132144763, |
| "grad_norm": 16.529132843017578, |
| "learning_rate": 1.6686390532544378e-06, |
| "loss": 0.3663, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.5026653889631666, |
| "grad_norm": 13.437551498413086, |
| "learning_rate": 1.660987553560498e-06, |
| "loss": 0.3704, |
| "step": 10950 |
| }, |
| { |
| "epoch": 0.5049606647118569, |
| "grad_norm": 9.07573127746582, |
| "learning_rate": 1.6533360538665578e-06, |
| "loss": 0.3767, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.5072559404605471, |
| "grad_norm": 29.058597564697266, |
| "learning_rate": 1.645684554172618e-06, |
| "loss": 0.3693, |
| "step": 11050 |
| }, |
| { |
| "epoch": 0.5095512162092374, |
| "grad_norm": 9.73399543762207, |
| "learning_rate": 1.638033054478678e-06, |
| "loss": 0.3755, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.5118464919579276, |
| "grad_norm": 19.876361846923828, |
| "learning_rate": 1.6303815547847378e-06, |
| "loss": 0.3807, |
| "step": 11150 |
| }, |
| { |
| "epoch": 0.5141417677066179, |
| "grad_norm": 12.44278621673584, |
| "learning_rate": 1.622730055090798e-06, |
| "loss": 0.3732, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.5164370434553082, |
| "grad_norm": 10.846384048461914, |
| "learning_rate": 1.6150785553968578e-06, |
| "loss": 0.3786, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.5187323192039983, |
| "grad_norm": 14.698681831359863, |
| "learning_rate": 1.607427055702918e-06, |
| "loss": 0.3792, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.5210275949526886, |
| "grad_norm": 10.92941951751709, |
| "learning_rate": 1.5997755560089777e-06, |
| "loss": 0.3849, |
| "step": 11350 |
| }, |
| { |
| "epoch": 0.5233228707013788, |
| "grad_norm": 12.259003639221191, |
| "learning_rate": 1.5921240563150378e-06, |
| "loss": 0.3764, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.5256181464500691, |
| "grad_norm": 12.608525276184082, |
| "learning_rate": 1.5844725566210977e-06, |
| "loss": 0.381, |
| "step": 11450 |
| }, |
| { |
| "epoch": 0.5279134221987594, |
| "grad_norm": 14.017390251159668, |
| "learning_rate": 1.5768210569271578e-06, |
| "loss": 0.3761, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.5302086979474496, |
| "grad_norm": 11.806578636169434, |
| "learning_rate": 1.5691695572332175e-06, |
| "loss": 0.3738, |
| "step": 11550 |
| }, |
| { |
| "epoch": 0.5325039736961399, |
| "grad_norm": 14.099353790283203, |
| "learning_rate": 1.5615180575392779e-06, |
| "loss": 0.3631, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.5347992494448301, |
| "grad_norm": 12.497589111328125, |
| "learning_rate": 1.5538665578453376e-06, |
| "loss": 0.3861, |
| "step": 11650 |
| }, |
| { |
| "epoch": 0.5370945251935204, |
| "grad_norm": 12.558547973632812, |
| "learning_rate": 1.5462150581513977e-06, |
| "loss": 0.3632, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.5393898009422107, |
| "grad_norm": 30.996755599975586, |
| "learning_rate": 1.5385635584574576e-06, |
| "loss": 0.3793, |
| "step": 11750 |
| }, |
| { |
| "epoch": 0.5416850766909009, |
| "grad_norm": 7.529469013214111, |
| "learning_rate": 1.5309120587635178e-06, |
| "loss": 0.3503, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.5439803524395912, |
| "grad_norm": 9.519927978515625, |
| "learning_rate": 1.5232605590695779e-06, |
| "loss": 0.374, |
| "step": 11850 |
| }, |
| { |
| "epoch": 0.5462756281882815, |
| "grad_norm": 16.44211769104004, |
| "learning_rate": 1.5156090593756376e-06, |
| "loss": 0.3647, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.5485709039369717, |
| "grad_norm": 30.076602935791016, |
| "learning_rate": 1.5079575596816977e-06, |
| "loss": 0.3795, |
| "step": 11950 |
| }, |
| { |
| "epoch": 0.550866179685662, |
| "grad_norm": 12.661836624145508, |
| "learning_rate": 1.5003060599877576e-06, |
| "loss": 0.3615, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.5531614554343522, |
| "grad_norm": 11.008061408996582, |
| "learning_rate": 1.4926545602938176e-06, |
| "loss": 0.3898, |
| "step": 12050 |
| }, |
| { |
| "epoch": 0.5554567311830425, |
| "grad_norm": 8.93970775604248, |
| "learning_rate": 1.4851560905937563e-06, |
| "loss": 0.3768, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.5577520069317328, |
| "grad_norm": 11.853392601013184, |
| "learning_rate": 1.4775045908998164e-06, |
| "loss": 0.3755, |
| "step": 12150 |
| }, |
| { |
| "epoch": 0.560047282680423, |
| "grad_norm": 13.085156440734863, |
| "learning_rate": 1.4698530912058765e-06, |
| "loss": 0.3617, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.5623425584291133, |
| "grad_norm": 10.487837791442871, |
| "learning_rate": 1.4622015915119364e-06, |
| "loss": 0.3806, |
| "step": 12250 |
| }, |
| { |
| "epoch": 0.5646378341778036, |
| "grad_norm": 9.621622085571289, |
| "learning_rate": 1.4545500918179964e-06, |
| "loss": 0.3789, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.5669331099264938, |
| "grad_norm": 18.29271125793457, |
| "learning_rate": 1.4468985921240565e-06, |
| "loss": 0.3809, |
| "step": 12350 |
| }, |
| { |
| "epoch": 0.5692283856751841, |
| "grad_norm": 15.107403755187988, |
| "learning_rate": 1.4392470924301164e-06, |
| "loss": 0.3847, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.5715236614238743, |
| "grad_norm": 9.128915786743164, |
| "learning_rate": 1.4315955927361763e-06, |
| "loss": 0.3718, |
| "step": 12450 |
| }, |
| { |
| "epoch": 0.5738189371725646, |
| "grad_norm": 10.191695213317871, |
| "learning_rate": 1.424097123036115e-06, |
| "loss": 0.3678, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.5761142129212549, |
| "grad_norm": 10.501177787780762, |
| "learning_rate": 1.4164456233421752e-06, |
| "loss": 0.3727, |
| "step": 12550 |
| }, |
| { |
| "epoch": 0.5784094886699451, |
| "grad_norm": 17.270280838012695, |
| "learning_rate": 1.408794123648235e-06, |
| "loss": 0.3725, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.5807047644186354, |
| "grad_norm": 11.970187187194824, |
| "learning_rate": 1.401142623954295e-06, |
| "loss": 0.3641, |
| "step": 12650 |
| }, |
| { |
| "epoch": 0.5830000401673257, |
| "grad_norm": 8.886324882507324, |
| "learning_rate": 1.393491124260355e-06, |
| "loss": 0.3663, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.5852953159160159, |
| "grad_norm": 10.107802391052246, |
| "learning_rate": 1.385839624566415e-06, |
| "loss": 0.3927, |
| "step": 12750 |
| }, |
| { |
| "epoch": 0.5875905916647061, |
| "grad_norm": 10.90116024017334, |
| "learning_rate": 1.378188124872475e-06, |
| "loss": 0.3745, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.5898858674133963, |
| "grad_norm": 27.726293563842773, |
| "learning_rate": 1.3705366251785349e-06, |
| "loss": 0.3691, |
| "step": 12850 |
| }, |
| { |
| "epoch": 0.5921811431620866, |
| "grad_norm": 14.674005508422852, |
| "learning_rate": 1.362885125484595e-06, |
| "loss": 0.3566, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.5944764189107768, |
| "grad_norm": 8.121840476989746, |
| "learning_rate": 1.3552336257906551e-06, |
| "loss": 0.3711, |
| "step": 12950 |
| }, |
| { |
| "epoch": 0.5967716946594671, |
| "grad_norm": 30.958778381347656, |
| "learning_rate": 1.347582126096715e-06, |
| "loss": 0.371, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.5990669704081574, |
| "grad_norm": 7.38535213470459, |
| "learning_rate": 1.339930626402775e-06, |
| "loss": 0.3679, |
| "step": 13050 |
| }, |
| { |
| "epoch": 0.6013622461568476, |
| "grad_norm": 34.25715255737305, |
| "learning_rate": 1.332279126708835e-06, |
| "loss": 0.3897, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.6036575219055379, |
| "grad_norm": 13.392374992370605, |
| "learning_rate": 1.324627627014895e-06, |
| "loss": 0.3873, |
| "step": 13150 |
| }, |
| { |
| "epoch": 0.6059527976542282, |
| "grad_norm": 12.169110298156738, |
| "learning_rate": 1.316976127320955e-06, |
| "loss": 0.3533, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.6082480734029184, |
| "grad_norm": 10.7171049118042, |
| "learning_rate": 1.3093246276270148e-06, |
| "loss": 0.3814, |
| "step": 13250 |
| }, |
| { |
| "epoch": 0.6105433491516087, |
| "grad_norm": 9.975994110107422, |
| "learning_rate": 1.301673127933075e-06, |
| "loss": 0.3817, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.612838624900299, |
| "grad_norm": 9.311064720153809, |
| "learning_rate": 1.2940216282391349e-06, |
| "loss": 0.3602, |
| "step": 13350 |
| }, |
| { |
| "epoch": 0.6151339006489892, |
| "grad_norm": 6.753169536590576, |
| "learning_rate": 1.2863701285451948e-06, |
| "loss": 0.3739, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.6174291763976795, |
| "grad_norm": 14.21111011505127, |
| "learning_rate": 1.278718628851255e-06, |
| "loss": 0.3677, |
| "step": 13450 |
| }, |
| { |
| "epoch": 0.6197244521463697, |
| "grad_norm": 10.715375900268555, |
| "learning_rate": 1.2710671291573149e-06, |
| "loss": 0.3575, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.62201972789506, |
| "grad_norm": 18.988964080810547, |
| "learning_rate": 1.2635686594572536e-06, |
| "loss": 0.3678, |
| "step": 13550 |
| }, |
| { |
| "epoch": 0.6243150036437503, |
| "grad_norm": 11.761975288391113, |
| "learning_rate": 1.2559171597633135e-06, |
| "loss": 0.3552, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.6266102793924405, |
| "grad_norm": 15.087313652038574, |
| "learning_rate": 1.2484186900632524e-06, |
| "loss": 0.3811, |
| "step": 13650 |
| }, |
| { |
| "epoch": 0.6289055551411308, |
| "grad_norm": 10.330754280090332, |
| "learning_rate": 1.2407671903693125e-06, |
| "loss": 0.3772, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.631200830889821, |
| "grad_norm": 18.443931579589844, |
| "learning_rate": 1.2331156906753725e-06, |
| "loss": 0.3739, |
| "step": 13750 |
| }, |
| { |
| "epoch": 0.6334961066385113, |
| "grad_norm": 10.0236177444458, |
| "learning_rate": 1.2254641909814324e-06, |
| "loss": 0.3811, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.6357913823872016, |
| "grad_norm": 12.954588890075684, |
| "learning_rate": 1.2178126912874923e-06, |
| "loss": 0.3658, |
| "step": 13850 |
| }, |
| { |
| "epoch": 0.6380866581358918, |
| "grad_norm": 11.381658554077148, |
| "learning_rate": 1.2101611915935524e-06, |
| "loss": 0.3718, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.6403819338845821, |
| "grad_norm": 12.019516944885254, |
| "learning_rate": 1.2025096918996123e-06, |
| "loss": 0.3622, |
| "step": 13950 |
| }, |
| { |
| "epoch": 0.6426772096332724, |
| "grad_norm": 10.334041595458984, |
| "learning_rate": 1.1948581922056723e-06, |
| "loss": 0.3636, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.6449724853819626, |
| "grad_norm": 10.666448593139648, |
| "learning_rate": 1.1872066925117324e-06, |
| "loss": 0.3451, |
| "step": 14050 |
| }, |
| { |
| "epoch": 0.6472677611306529, |
| "grad_norm": 9.785908699035645, |
| "learning_rate": 1.1795551928177923e-06, |
| "loss": 0.364, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.6495630368793431, |
| "grad_norm": 17.802858352661133, |
| "learning_rate": 1.1719036931238522e-06, |
| "loss": 0.365, |
| "step": 14150 |
| }, |
| { |
| "epoch": 0.6518583126280334, |
| "grad_norm": 11.654463768005371, |
| "learning_rate": 1.1642521934299123e-06, |
| "loss": 0.3755, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.6541535883767237, |
| "grad_norm": 27.977380752563477, |
| "learning_rate": 1.1566006937359723e-06, |
| "loss": 0.3865, |
| "step": 14250 |
| }, |
| { |
| "epoch": 0.6564488641254139, |
| "grad_norm": 12.833617210388184, |
| "learning_rate": 1.1489491940420322e-06, |
| "loss": 0.3761, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.6587441398741041, |
| "grad_norm": 10.381138801574707, |
| "learning_rate": 1.1412976943480923e-06, |
| "loss": 0.3693, |
| "step": 14350 |
| }, |
| { |
| "epoch": 0.6610394156227943, |
| "grad_norm": 10.039835929870605, |
| "learning_rate": 1.1336461946541522e-06, |
| "loss": 0.3621, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.6633346913714846, |
| "grad_norm": 9.079568862915039, |
| "learning_rate": 1.1259946949602123e-06, |
| "loss": 0.385, |
| "step": 14450 |
| }, |
| { |
| "epoch": 0.6656299671201749, |
| "grad_norm": 9.107003211975098, |
| "learning_rate": 1.1183431952662723e-06, |
| "loss": 0.3798, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.6679252428688651, |
| "grad_norm": 10.977982521057129, |
| "learning_rate": 1.1106916955723322e-06, |
| "loss": 0.3721, |
| "step": 14550 |
| }, |
| { |
| "epoch": 0.6702205186175554, |
| "grad_norm": 14.997126579284668, |
| "learning_rate": 1.1030401958783923e-06, |
| "loss": 0.347, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.6725157943662456, |
| "grad_norm": 13.861760139465332, |
| "learning_rate": 1.0953886961844522e-06, |
| "loss": 0.3727, |
| "step": 14650 |
| }, |
| { |
| "epoch": 0.6748110701149359, |
| "grad_norm": 10.945677757263184, |
| "learning_rate": 1.0877371964905121e-06, |
| "loss": 0.3741, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.6771063458636262, |
| "grad_norm": 14.002128601074219, |
| "learning_rate": 1.080085696796572e-06, |
| "loss": 0.3698, |
| "step": 14750 |
| }, |
| { |
| "epoch": 0.6794016216123164, |
| "grad_norm": 17.263320922851562, |
| "learning_rate": 1.0724341971026322e-06, |
| "loss": 0.3614, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.6816968973610067, |
| "grad_norm": 12.30994987487793, |
| "learning_rate": 1.064782697408692e-06, |
| "loss": 0.3683, |
| "step": 14850 |
| }, |
| { |
| "epoch": 0.683992173109697, |
| "grad_norm": 9.857229232788086, |
| "learning_rate": 1.057131197714752e-06, |
| "loss": 0.3698, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.6862874488583872, |
| "grad_norm": 18.498348236083984, |
| "learning_rate": 1.0494796980208121e-06, |
| "loss": 0.3665, |
| "step": 14950 |
| }, |
| { |
| "epoch": 0.6885827246070775, |
| "grad_norm": 22.762800216674805, |
| "learning_rate": 1.041828198326872e-06, |
| "loss": 0.3656, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.6908780003557677, |
| "grad_norm": 9.595602035522461, |
| "learning_rate": 1.034176698632932e-06, |
| "loss": 0.3658, |
| "step": 15050 |
| }, |
| { |
| "epoch": 0.693173276104458, |
| "grad_norm": 9.138602256774902, |
| "learning_rate": 1.0265251989389919e-06, |
| "loss": 0.3459, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.6954685518531483, |
| "grad_norm": 21.768016815185547, |
| "learning_rate": 1.0188736992450522e-06, |
| "loss": 0.3768, |
| "step": 15150 |
| }, |
| { |
| "epoch": 0.6977638276018385, |
| "grad_norm": 16.048538208007812, |
| "learning_rate": 1.0112221995511121e-06, |
| "loss": 0.3578, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.7000591033505288, |
| "grad_norm": 18.636796951293945, |
| "learning_rate": 1.003570699857172e-06, |
| "loss": 0.3632, |
| "step": 15250 |
| }, |
| { |
| "epoch": 0.702354379099219, |
| "grad_norm": 21.801054000854492, |
| "learning_rate": 9.95919200163232e-07, |
| "loss": 0.3631, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.7046496548479093, |
| "grad_norm": 25.04891014099121, |
| "learning_rate": 9.882677004692921e-07, |
| "loss": 0.3558, |
| "step": 15350 |
| }, |
| { |
| "epoch": 0.7069449305965996, |
| "grad_norm": 10.539541244506836, |
| "learning_rate": 9.80616200775352e-07, |
| "loss": 0.3644, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.7092402063452898, |
| "grad_norm": 13.989766120910645, |
| "learning_rate": 9.72964701081412e-07, |
| "loss": 0.3618, |
| "step": 15450 |
| }, |
| { |
| "epoch": 0.7115354820939801, |
| "grad_norm": 10.638326644897461, |
| "learning_rate": 9.65313201387472e-07, |
| "loss": 0.3675, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.7138307578426704, |
| "grad_norm": 11.744914054870605, |
| "learning_rate": 9.57661701693532e-07, |
| "loss": 0.3657, |
| "step": 15550 |
| }, |
| { |
| "epoch": 0.7161260335913606, |
| "grad_norm": 10.490307807922363, |
| "learning_rate": 9.500102019995919e-07, |
| "loss": 0.3736, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.7184213093400509, |
| "grad_norm": 24.758380889892578, |
| "learning_rate": 9.425117322995307e-07, |
| "loss": 0.3817, |
| "step": 15650 |
| }, |
| { |
| "epoch": 0.7207165850887411, |
| "grad_norm": 13.692597389221191, |
| "learning_rate": 9.348602326055906e-07, |
| "loss": 0.3561, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.7230118608374314, |
| "grad_norm": 9.888838768005371, |
| "learning_rate": 9.272087329116507e-07, |
| "loss": 0.3581, |
| "step": 15750 |
| }, |
| { |
| "epoch": 0.7253071365861217, |
| "grad_norm": 12.952566146850586, |
| "learning_rate": 9.195572332177106e-07, |
| "loss": 0.3672, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.7276024123348118, |
| "grad_norm": 11.688536643981934, |
| "learning_rate": 9.120587635176495e-07, |
| "loss": 0.3766, |
| "step": 15850 |
| }, |
| { |
| "epoch": 0.7298976880835021, |
| "grad_norm": 17.879161834716797, |
| "learning_rate": 9.044072638237095e-07, |
| "loss": 0.3622, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.7321929638321923, |
| "grad_norm": 20.887718200683594, |
| "learning_rate": 8.967557641297696e-07, |
| "loss": 0.3649, |
| "step": 15950 |
| }, |
| { |
| "epoch": 0.7344882395808826, |
| "grad_norm": 9.507134437561035, |
| "learning_rate": 8.891042644358295e-07, |
| "loss": 0.3561, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.7367835153295729, |
| "grad_norm": 15.035599708557129, |
| "learning_rate": 8.814527647418895e-07, |
| "loss": 0.3428, |
| "step": 16050 |
| }, |
| { |
| "epoch": 0.7390787910782631, |
| "grad_norm": 18.98991584777832, |
| "learning_rate": 8.738012650479494e-07, |
| "loss": 0.3647, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.7413740668269534, |
| "grad_norm": 11.455354690551758, |
| "learning_rate": 8.661497653540094e-07, |
| "loss": 0.3546, |
| "step": 16150 |
| }, |
| { |
| "epoch": 0.7436693425756437, |
| "grad_norm": 10.941442489624023, |
| "learning_rate": 8.584982656600693e-07, |
| "loss": 0.3704, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.7459646183243339, |
| "grad_norm": 11.790033340454102, |
| "learning_rate": 8.508467659661294e-07, |
| "loss": 0.3589, |
| "step": 16250 |
| }, |
| { |
| "epoch": 0.7482598940730242, |
| "grad_norm": 7.833493232727051, |
| "learning_rate": 8.431952662721894e-07, |
| "loss": 0.3517, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.7505551698217144, |
| "grad_norm": 12.305365562438965, |
| "learning_rate": 8.355437665782493e-07, |
| "loss": 0.3595, |
| "step": 16350 |
| }, |
| { |
| "epoch": 0.7528504455704047, |
| "grad_norm": 14.888938903808594, |
| "learning_rate": 8.278922668843093e-07, |
| "loss": 0.3793, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.755145721319095, |
| "grad_norm": 9.640114784240723, |
| "learning_rate": 8.202407671903692e-07, |
| "loss": 0.3482, |
| "step": 16450 |
| }, |
| { |
| "epoch": 0.7574409970677852, |
| "grad_norm": 14.406376838684082, |
| "learning_rate": 8.125892674964293e-07, |
| "loss": 0.3607, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.7597362728164755, |
| "grad_norm": 47.160240173339844, |
| "learning_rate": 8.049377678024892e-07, |
| "loss": 0.3612, |
| "step": 16550 |
| }, |
| { |
| "epoch": 0.7620315485651658, |
| "grad_norm": 13.127697944641113, |
| "learning_rate": 7.972862681085494e-07, |
| "loss": 0.3573, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.764326824313856, |
| "grad_norm": 14.993858337402344, |
| "learning_rate": 7.896347684146093e-07, |
| "loss": 0.3652, |
| "step": 16650 |
| }, |
| { |
| "epoch": 0.7666221000625463, |
| "grad_norm": 10.984702110290527, |
| "learning_rate": 7.819832687206694e-07, |
| "loss": 0.3756, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.7689173758112365, |
| "grad_norm": 14.359518051147461, |
| "learning_rate": 7.743317690267293e-07, |
| "loss": 0.3458, |
| "step": 16750 |
| }, |
| { |
| "epoch": 0.7712126515599268, |
| "grad_norm": 11.842260360717773, |
| "learning_rate": 7.666802693327893e-07, |
| "loss": 0.3471, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.7735079273086171, |
| "grad_norm": 19.50934410095215, |
| "learning_rate": 7.590287696388492e-07, |
| "loss": 0.3696, |
| "step": 16850 |
| }, |
| { |
| "epoch": 0.7758032030573073, |
| "grad_norm": 9.951668739318848, |
| "learning_rate": 7.513772699449092e-07, |
| "loss": 0.3619, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.7780984788059976, |
| "grad_norm": 9.378868103027344, |
| "learning_rate": 7.437257702509693e-07, |
| "loss": 0.367, |
| "step": 16950 |
| }, |
| { |
| "epoch": 0.7803937545546878, |
| "grad_norm": 10.392294883728027, |
| "learning_rate": 7.360742705570292e-07, |
| "loss": 0.3729, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.7826890303033781, |
| "grad_norm": 10.031806945800781, |
| "learning_rate": 7.284227708630892e-07, |
| "loss": 0.3491, |
| "step": 17050 |
| }, |
| { |
| "epoch": 0.7849843060520684, |
| "grad_norm": 9.783834457397461, |
| "learning_rate": 7.209243011630279e-07, |
| "loss": 0.364, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.7872795818007586, |
| "grad_norm": 13.27206802368164, |
| "learning_rate": 7.13272801469088e-07, |
| "loss": 0.3544, |
| "step": 17150 |
| }, |
| { |
| "epoch": 0.7895748575494489, |
| "grad_norm": 84.03044891357422, |
| "learning_rate": 7.05621301775148e-07, |
| "loss": 0.3701, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.7918701332981392, |
| "grad_norm": 9.07703971862793, |
| "learning_rate": 6.97969802081208e-07, |
| "loss": 0.3366, |
| "step": 17250 |
| }, |
| { |
| "epoch": 0.7941654090468294, |
| "grad_norm": 9.118254661560059, |
| "learning_rate": 6.904713323811467e-07, |
| "loss": 0.3728, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.7964606847955196, |
| "grad_norm": 20.656383514404297, |
| "learning_rate": 6.828198326872067e-07, |
| "loss": 0.3576, |
| "step": 17350 |
| }, |
| { |
| "epoch": 0.7987559605442098, |
| "grad_norm": 12.286031723022461, |
| "learning_rate": 6.751683329932666e-07, |
| "loss": 0.3612, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.8010512362929001, |
| "grad_norm": 9.951446533203125, |
| "learning_rate": 6.675168332993268e-07, |
| "loss": 0.3527, |
| "step": 17450 |
| }, |
| { |
| "epoch": 0.8033465120415904, |
| "grad_norm": 8.655536651611328, |
| "learning_rate": 6.598653336053867e-07, |
| "loss": 0.357, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.8056417877902806, |
| "grad_norm": 11.027405738830566, |
| "learning_rate": 6.522138339114467e-07, |
| "loss": 0.3494, |
| "step": 17550 |
| }, |
| { |
| "epoch": 0.8079370635389709, |
| "grad_norm": 10.6943359375, |
| "learning_rate": 6.445623342175066e-07, |
| "loss": 0.3659, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.8102323392876611, |
| "grad_norm": 12.674545288085938, |
| "learning_rate": 6.369108345235666e-07, |
| "loss": 0.3545, |
| "step": 17650 |
| }, |
| { |
| "epoch": 0.8125276150363514, |
| "grad_norm": 12.870709419250488, |
| "learning_rate": 6.292593348296266e-07, |
| "loss": 0.3521, |
| "step": 17700 |
| }, |
| { |
| "epoch": 0.8148228907850417, |
| "grad_norm": 11.121269226074219, |
| "learning_rate": 6.216078351356866e-07, |
| "loss": 0.3786, |
| "step": 17750 |
| }, |
| { |
| "epoch": 0.8171181665337319, |
| "grad_norm": 13.682574272155762, |
| "learning_rate": 6.139563354417466e-07, |
| "loss": 0.3633, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.8194134422824222, |
| "grad_norm": 11.04853630065918, |
| "learning_rate": 6.063048357478066e-07, |
| "loss": 0.3745, |
| "step": 17850 |
| }, |
| { |
| "epoch": 0.8217087180311125, |
| "grad_norm": 9.358290672302246, |
| "learning_rate": 5.986533360538666e-07, |
| "loss": 0.369, |
| "step": 17900 |
| }, |
| { |
| "epoch": 0.8240039937798027, |
| "grad_norm": 7.955355644226074, |
| "learning_rate": 5.910018363599266e-07, |
| "loss": 0.3639, |
| "step": 17950 |
| }, |
| { |
| "epoch": 0.826299269528493, |
| "grad_norm": 25.4930362701416, |
| "learning_rate": 5.833503366659866e-07, |
| "loss": 0.3396, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.8285945452771832, |
| "grad_norm": 8.573712348937988, |
| "learning_rate": 5.756988369720465e-07, |
| "loss": 0.3453, |
| "step": 18050 |
| }, |
| { |
| "epoch": 0.8308898210258735, |
| "grad_norm": 9.966428756713867, |
| "learning_rate": 5.680473372781065e-07, |
| "loss": 0.3578, |
| "step": 18100 |
| }, |
| { |
| "epoch": 0.8331850967745638, |
| "grad_norm": 10.190299034118652, |
| "learning_rate": 5.603958375841664e-07, |
| "loss": 0.3727, |
| "step": 18150 |
| }, |
| { |
| "epoch": 0.835480372523254, |
| "grad_norm": 16.960046768188477, |
| "learning_rate": 5.527443378902265e-07, |
| "loss": 0.3585, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.8377756482719443, |
| "grad_norm": 15.969688415527344, |
| "learning_rate": 5.450928381962865e-07, |
| "loss": 0.3795, |
| "step": 18250 |
| }, |
| { |
| "epoch": 0.8400709240206345, |
| "grad_norm": 10.289957046508789, |
| "learning_rate": 5.374413385023465e-07, |
| "loss": 0.3623, |
| "step": 18300 |
| }, |
| { |
| "epoch": 0.8423661997693248, |
| "grad_norm": 15.047087669372559, |
| "learning_rate": 5.297898388084065e-07, |
| "loss": 0.371, |
| "step": 18350 |
| }, |
| { |
| "epoch": 0.8446614755180151, |
| "grad_norm": 18.332683563232422, |
| "learning_rate": 5.221383391144664e-07, |
| "loss": 0.362, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.8469567512667053, |
| "grad_norm": 9.587491989135742, |
| "learning_rate": 5.144868394205265e-07, |
| "loss": 0.3656, |
| "step": 18450 |
| }, |
| { |
| "epoch": 0.8492520270153956, |
| "grad_norm": 15.642866134643555, |
| "learning_rate": 5.068353397265864e-07, |
| "loss": 0.3469, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.8515473027640859, |
| "grad_norm": 8.69747543334961, |
| "learning_rate": 4.991838400326464e-07, |
| "loss": 0.3594, |
| "step": 18550 |
| }, |
| { |
| "epoch": 0.8538425785127761, |
| "grad_norm": 11.755226135253906, |
| "learning_rate": 4.915323403387063e-07, |
| "loss": 0.3597, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.8561378542614664, |
| "grad_norm": 13.479146957397461, |
| "learning_rate": 4.838808406447664e-07, |
| "loss": 0.3545, |
| "step": 18650 |
| }, |
| { |
| "epoch": 0.8584331300101566, |
| "grad_norm": 12.416823387145996, |
| "learning_rate": 4.762293409508264e-07, |
| "loss": 0.3668, |
| "step": 18700 |
| }, |
| { |
| "epoch": 0.8607284057588469, |
| "grad_norm": 9.566291809082031, |
| "learning_rate": 4.687308712507652e-07, |
| "loss": 0.3612, |
| "step": 18750 |
| }, |
| { |
| "epoch": 0.8630236815075372, |
| "grad_norm": 15.225481986999512, |
| "learning_rate": 4.6107937155682516e-07, |
| "loss": 0.3604, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.8653189572562273, |
| "grad_norm": 42.362083435058594, |
| "learning_rate": 4.5342787186288513e-07, |
| "loss": 0.3607, |
| "step": 18850 |
| }, |
| { |
| "epoch": 0.8676142330049176, |
| "grad_norm": 29.728242874145508, |
| "learning_rate": 4.457763721689451e-07, |
| "loss": 0.3431, |
| "step": 18900 |
| }, |
| { |
| "epoch": 0.8699095087536078, |
| "grad_norm": 11.724947929382324, |
| "learning_rate": 4.3812487247500506e-07, |
| "loss": 0.3595, |
| "step": 18950 |
| }, |
| { |
| "epoch": 0.8722047845022981, |
| "grad_norm": 13.001766204833984, |
| "learning_rate": 4.3047337278106514e-07, |
| "loss": 0.3551, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.8745000602509884, |
| "grad_norm": 14.711641311645508, |
| "learning_rate": 4.228218730871251e-07, |
| "loss": 0.3598, |
| "step": 19050 |
| }, |
| { |
| "epoch": 0.8767953359996786, |
| "grad_norm": 23.77117347717285, |
| "learning_rate": 4.151703733931851e-07, |
| "loss": 0.3667, |
| "step": 19100 |
| }, |
| { |
| "epoch": 0.8790906117483689, |
| "grad_norm": 12.186518669128418, |
| "learning_rate": 4.0751887369924505e-07, |
| "loss": 0.3578, |
| "step": 19150 |
| }, |
| { |
| "epoch": 0.8813858874970592, |
| "grad_norm": 9.117465019226074, |
| "learning_rate": 3.99867374005305e-07, |
| "loss": 0.3634, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.8836811632457494, |
| "grad_norm": 7.487546443939209, |
| "learning_rate": 3.9221587431136504e-07, |
| "loss": 0.3485, |
| "step": 19250 |
| }, |
| { |
| "epoch": 0.8859764389944397, |
| "grad_norm": 9.04023265838623, |
| "learning_rate": 3.84564374617425e-07, |
| "loss": 0.3543, |
| "step": 19300 |
| }, |
| { |
| "epoch": 0.8882717147431299, |
| "grad_norm": 17.847694396972656, |
| "learning_rate": 3.76912874923485e-07, |
| "loss": 0.3514, |
| "step": 19350 |
| }, |
| { |
| "epoch": 0.8905669904918202, |
| "grad_norm": 27.124319076538086, |
| "learning_rate": 3.69261375229545e-07, |
| "loss": 0.3658, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.8928622662405105, |
| "grad_norm": 22.0621280670166, |
| "learning_rate": 3.6160987553560497e-07, |
| "loss": 0.351, |
| "step": 19450 |
| }, |
| { |
| "epoch": 0.8951575419892007, |
| "grad_norm": 11.973557472229004, |
| "learning_rate": 3.53958375841665e-07, |
| "loss": 0.3485, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.897452817737891, |
| "grad_norm": 15.552183151245117, |
| "learning_rate": 3.4630687614772496e-07, |
| "loss": 0.3482, |
| "step": 19550 |
| }, |
| { |
| "epoch": 0.8997480934865812, |
| "grad_norm": 37.236507415771484, |
| "learning_rate": 3.3865537645378493e-07, |
| "loss": 0.3493, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.9020433692352715, |
| "grad_norm": 12.226764678955078, |
| "learning_rate": 3.3115690675372375e-07, |
| "loss": 0.3582, |
| "step": 19650 |
| }, |
| { |
| "epoch": 0.9043386449839618, |
| "grad_norm": 9.519804000854492, |
| "learning_rate": 3.235054070597837e-07, |
| "loss": 0.3603, |
| "step": 19700 |
| }, |
| { |
| "epoch": 0.906633920732652, |
| "grad_norm": 11.330206871032715, |
| "learning_rate": 3.158539073658437e-07, |
| "loss": 0.3564, |
| "step": 19750 |
| }, |
| { |
| "epoch": 0.9089291964813423, |
| "grad_norm": 9.512042999267578, |
| "learning_rate": 3.082024076719037e-07, |
| "loss": 0.3578, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.9112244722300326, |
| "grad_norm": 27.564258575439453, |
| "learning_rate": 3.005509079779637e-07, |
| "loss": 0.348, |
| "step": 19850 |
| }, |
| { |
| "epoch": 0.9135197479787228, |
| "grad_norm": 9.83193302154541, |
| "learning_rate": 2.9289940828402365e-07, |
| "loss": 0.3635, |
| "step": 19900 |
| }, |
| { |
| "epoch": 0.9158150237274131, |
| "grad_norm": 21.163455963134766, |
| "learning_rate": 2.852479085900837e-07, |
| "loss": 0.3457, |
| "step": 19950 |
| }, |
| { |
| "epoch": 0.9181102994761033, |
| "grad_norm": 7.9303975105285645, |
| "learning_rate": 2.7759640889614364e-07, |
| "loss": 0.351, |
| "step": 20000 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 21783, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|