| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.4063565778971047, | |
| "eval_steps": 500, | |
| "global_step": 70000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0005805093969958639, | |
| "grad_norm": 0.7240252494812012, | |
| "learning_rate": 5.746459252379847e-07, | |
| "loss": 0.1079, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0011610187939917278, | |
| "grad_norm": 5.649069786071777, | |
| "learning_rate": 1.1550963547713026e-06, | |
| "loss": 0.1034, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0017415281909875916, | |
| "grad_norm": 2.677654504776001, | |
| "learning_rate": 1.7355467843046206e-06, | |
| "loss": 0.0933, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0023220375879834556, | |
| "grad_norm": 1.4429970979690552, | |
| "learning_rate": 2.3159972138379382e-06, | |
| "loss": 0.0998, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0029025469849793192, | |
| "grad_norm": 1.217147707939148, | |
| "learning_rate": 2.8964476433712563e-06, | |
| "loss": 0.0968, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0034830563819751833, | |
| "grad_norm": 0.10677797347307205, | |
| "learning_rate": 3.476898072904574e-06, | |
| "loss": 0.0903, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.004063565778971047, | |
| "grad_norm": 18.24258804321289, | |
| "learning_rate": 4.0573485024378915e-06, | |
| "loss": 0.0898, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.004644075175966911, | |
| "grad_norm": 0.49379172921180725, | |
| "learning_rate": 4.63779893197121e-06, | |
| "loss": 0.0741, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.0052245845729627744, | |
| "grad_norm": 0.35244712233543396, | |
| "learning_rate": 5.218249361504528e-06, | |
| "loss": 0.0691, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.0058050939699586385, | |
| "grad_norm": 1.6648738384246826, | |
| "learning_rate": 5.798699791037845e-06, | |
| "loss": 0.0652, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.0063856033669545025, | |
| "grad_norm": 1.3582569360733032, | |
| "learning_rate": 6.379150220571163e-06, | |
| "loss": 0.0632, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.0069661127639503665, | |
| "grad_norm": 8.560323715209961, | |
| "learning_rate": 6.9596006501044805e-06, | |
| "loss": 0.0573, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.0075466221609462305, | |
| "grad_norm": 1.7866237163543701, | |
| "learning_rate": 7.5400510796378e-06, | |
| "loss": 0.0517, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.008127131557942095, | |
| "grad_norm": 2.1602494716644287, | |
| "learning_rate": 8.120501509171117e-06, | |
| "loss": 0.0531, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.008707640954937958, | |
| "grad_norm": 3.2753777503967285, | |
| "learning_rate": 8.700951938704436e-06, | |
| "loss": 0.0446, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.009288150351933823, | |
| "grad_norm": 7.379251003265381, | |
| "learning_rate": 9.281402368237753e-06, | |
| "loss": 0.0415, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.009868659748929686, | |
| "grad_norm": 2.0278890132904053, | |
| "learning_rate": 9.861852797771071e-06, | |
| "loss": 0.0471, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.010449169145925549, | |
| "grad_norm": 1.9143238067626953, | |
| "learning_rate": 1.0442303227304388e-05, | |
| "loss": 0.0431, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.011029678542921414, | |
| "grad_norm": 3.3644015789031982, | |
| "learning_rate": 1.1022753656837706e-05, | |
| "loss": 0.0388, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.011610187939917277, | |
| "grad_norm": 5.762856960296631, | |
| "learning_rate": 1.1603204086371025e-05, | |
| "loss": 0.0387, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.012190697336913142, | |
| "grad_norm": 3.5617690086364746, | |
| "learning_rate": 1.2183654515904343e-05, | |
| "loss": 0.0437, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.012771206733909005, | |
| "grad_norm": 4.118985176086426, | |
| "learning_rate": 1.276410494543766e-05, | |
| "loss": 0.0368, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.01335171613090487, | |
| "grad_norm": 0.7795373201370239, | |
| "learning_rate": 1.3344555374970977e-05, | |
| "loss": 0.0441, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.013932225527900733, | |
| "grad_norm": 6.839756011962891, | |
| "learning_rate": 1.3925005804504295e-05, | |
| "loss": 0.0362, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.014512734924896596, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.4505456234037616e-05, | |
| "loss": 0.0355, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.015093244321892461, | |
| "grad_norm": 0.7248427271842957, | |
| "learning_rate": 1.508590666357093e-05, | |
| "loss": 0.0352, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.015673753718888326, | |
| "grad_norm": 1.2530128955841064, | |
| "learning_rate": 1.566635709310425e-05, | |
| "loss": 0.0337, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.01625426311588419, | |
| "grad_norm": 1.8551280498504639, | |
| "learning_rate": 1.6246807522637568e-05, | |
| "loss": 0.0392, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.016834772512880052, | |
| "grad_norm": 1.7844129800796509, | |
| "learning_rate": 1.6827257952170884e-05, | |
| "loss": 0.0327, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.017415281909875915, | |
| "grad_norm": 1.0492717027664185, | |
| "learning_rate": 1.74077083817042e-05, | |
| "loss": 0.0326, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.01799579130687178, | |
| "grad_norm": 1.5272563695907593, | |
| "learning_rate": 1.798815881123752e-05, | |
| "loss": 0.0363, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.018576300703867645, | |
| "grad_norm": 1.028568983078003, | |
| "learning_rate": 1.856860924077084e-05, | |
| "loss": 0.0326, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.01915681010086351, | |
| "grad_norm": 3.0390264987945557, | |
| "learning_rate": 1.9149059670304155e-05, | |
| "loss": 0.0327, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.01973731949785937, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9729510099837475e-05, | |
| "loss": 0.034, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.020317828894855235, | |
| "grad_norm": 3.5412075519561768, | |
| "learning_rate": 2.0309960529370792e-05, | |
| "loss": 0.0301, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.020898338291851098, | |
| "grad_norm": 63.408573150634766, | |
| "learning_rate": 2.0890410958904112e-05, | |
| "loss": 0.0391, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.021478847688846964, | |
| "grad_norm": 2.8030202388763428, | |
| "learning_rate": 2.147086138843743e-05, | |
| "loss": 0.0269, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.022059357085842828, | |
| "grad_norm": 9.339249610900879, | |
| "learning_rate": 2.2051311817970746e-05, | |
| "loss": 0.0322, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.02263986648283869, | |
| "grad_norm": 1.6153712272644043, | |
| "learning_rate": 2.2631762247504066e-05, | |
| "loss": 0.0347, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.023220375879834554, | |
| "grad_norm": 2.073003053665161, | |
| "learning_rate": 2.3212212677037383e-05, | |
| "loss": 0.0252, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.023800885276830417, | |
| "grad_norm": 1.535114049911499, | |
| "learning_rate": 2.37926631065707e-05, | |
| "loss": 0.0319, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.024381394673826284, | |
| "grad_norm": 2.764871120452881, | |
| "learning_rate": 2.4373113536104016e-05, | |
| "loss": 0.0316, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.024961904070822147, | |
| "grad_norm": 1.169055700302124, | |
| "learning_rate": 2.4953563965637336e-05, | |
| "loss": 0.0289, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.02554241346781801, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.5534014395170653e-05, | |
| "loss": 0.0301, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.026122922864813873, | |
| "grad_norm": 5.054708957672119, | |
| "learning_rate": 2.611446482470397e-05, | |
| "loss": 0.0276, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.02670343226180974, | |
| "grad_norm": 3.1561946868896484, | |
| "learning_rate": 2.669491525423729e-05, | |
| "loss": 0.0284, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.027283941658805603, | |
| "grad_norm": 1.3254655599594116, | |
| "learning_rate": 2.727536568377061e-05, | |
| "loss": 0.029, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.027864451055801466, | |
| "grad_norm": 15.622490882873535, | |
| "learning_rate": 2.7855816113303924e-05, | |
| "loss": 0.0319, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.02844496045279733, | |
| "grad_norm": 11.673898696899414, | |
| "learning_rate": 2.8436266542837244e-05, | |
| "loss": 0.0311, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.029025469849793192, | |
| "grad_norm": 3.8026087284088135, | |
| "learning_rate": 2.901671697237056e-05, | |
| "loss": 0.0273, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.02960597924678906, | |
| "grad_norm": 0.24046263098716736, | |
| "learning_rate": 2.959716740190388e-05, | |
| "loss": 0.0283, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.030186488643784922, | |
| "grad_norm": 3.2492616176605225, | |
| "learning_rate": 3.01776178314372e-05, | |
| "loss": 0.0288, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.030766998040780785, | |
| "grad_norm": 5.840951919555664, | |
| "learning_rate": 3.075806826097051e-05, | |
| "loss": 0.0267, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.03134750743777665, | |
| "grad_norm": 4.141648769378662, | |
| "learning_rate": 3.1338518690503834e-05, | |
| "loss": 0.031, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.03192801683477251, | |
| "grad_norm": 0.6267948746681213, | |
| "learning_rate": 3.191896912003715e-05, | |
| "loss": 0.027, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.03250852623176838, | |
| "grad_norm": 4.212204933166504, | |
| "learning_rate": 3.249941954957047e-05, | |
| "loss": 0.0312, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.03308903562876424, | |
| "grad_norm": 9.002190589904785, | |
| "learning_rate": 3.3079869979103785e-05, | |
| "loss": 0.0328, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.033669545025760104, | |
| "grad_norm": 5.377740383148193, | |
| "learning_rate": 3.36603204086371e-05, | |
| "loss": 0.0289, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.03425005442275597, | |
| "grad_norm": 4.514215469360352, | |
| "learning_rate": 3.4240770838170425e-05, | |
| "loss": 0.0288, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.03483056381975183, | |
| "grad_norm": 11.520332336425781, | |
| "learning_rate": 3.482122126770374e-05, | |
| "loss": 0.0315, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.0354110732167477, | |
| "grad_norm": 2.2946815490722656, | |
| "learning_rate": 3.540167169723706e-05, | |
| "loss": 0.0289, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.03599158261374356, | |
| "grad_norm": 2.24802565574646, | |
| "learning_rate": 3.5982122126770375e-05, | |
| "loss": 0.0233, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.036572092010739424, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.656257255630369e-05, | |
| "loss": 0.0258, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.03715260140773529, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.714302298583701e-05, | |
| "loss": 0.0279, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.03773311080473115, | |
| "grad_norm": 0.2940079867839813, | |
| "learning_rate": 3.772347341537033e-05, | |
| "loss": 0.0239, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.03831362020172702, | |
| "grad_norm": 1.4886199235916138, | |
| "learning_rate": 3.830392384490365e-05, | |
| "loss": 0.0284, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.038894129598722876, | |
| "grad_norm": 2.378464698791504, | |
| "learning_rate": 3.8884374274436966e-05, | |
| "loss": 0.0292, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.03947463899571874, | |
| "grad_norm": 1.4737799167633057, | |
| "learning_rate": 3.946482470397028e-05, | |
| "loss": 0.0252, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.04005514839271461, | |
| "grad_norm": 1.7435976266860962, | |
| "learning_rate": 4.00452751335036e-05, | |
| "loss": 0.0252, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.04063565778971047, | |
| "grad_norm": 3.012014627456665, | |
| "learning_rate": 4.062572556303692e-05, | |
| "loss": 0.0341, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.041216167186706336, | |
| "grad_norm": 1.463402509689331, | |
| "learning_rate": 4.120617599257023e-05, | |
| "loss": 0.0242, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.041796676583702196, | |
| "grad_norm": 2.936508893966675, | |
| "learning_rate": 4.178662642210355e-05, | |
| "loss": 0.0303, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.04237718598069806, | |
| "grad_norm": 2.3473055362701416, | |
| "learning_rate": 4.2367076851636874e-05, | |
| "loss": 0.0268, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.04295769537769393, | |
| "grad_norm": 3.3722922801971436, | |
| "learning_rate": 4.294752728117019e-05, | |
| "loss": 0.0212, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.04353820477468979, | |
| "grad_norm": 3.0405075550079346, | |
| "learning_rate": 4.352797771070351e-05, | |
| "loss": 0.0277, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.044118714171685655, | |
| "grad_norm": 1.044071912765503, | |
| "learning_rate": 4.4108428140236824e-05, | |
| "loss": 0.0225, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.044699223568681515, | |
| "grad_norm": 1.0134261846542358, | |
| "learning_rate": 4.468887856977014e-05, | |
| "loss": 0.0253, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.04527973296567738, | |
| "grad_norm": 9.729911804199219, | |
| "learning_rate": 4.5269328999303464e-05, | |
| "loss": 0.0323, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.04586024236267325, | |
| "grad_norm": 2.1204724311828613, | |
| "learning_rate": 4.584977942883678e-05, | |
| "loss": 0.0279, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.04644075175966911, | |
| "grad_norm": 9.481526374816895, | |
| "learning_rate": 4.64302298583701e-05, | |
| "loss": 0.031, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.047021261156664974, | |
| "grad_norm": 0.6424680948257446, | |
| "learning_rate": 4.7010680287903415e-05, | |
| "loss": 0.0278, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.047601770553660834, | |
| "grad_norm": 2.0485119819641113, | |
| "learning_rate": 4.759113071743673e-05, | |
| "loss": 0.0261, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.0481822799506567, | |
| "grad_norm": 2.485046148300171, | |
| "learning_rate": 4.8171581146970055e-05, | |
| "loss": 0.0257, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.04876278934765257, | |
| "grad_norm": 1.421764612197876, | |
| "learning_rate": 4.875203157650337e-05, | |
| "loss": 0.0312, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.04934329874464843, | |
| "grad_norm": 1.3517789840698242, | |
| "learning_rate": 4.933248200603668e-05, | |
| "loss": 0.0325, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.049923808141644294, | |
| "grad_norm": 0.42899224162101746, | |
| "learning_rate": 4.9912932435570005e-05, | |
| "loss": 0.0225, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.05050431753864015, | |
| "grad_norm": 1.7937917709350586, | |
| "learning_rate": 4.9999966717127464e-05, | |
| "loss": 0.0289, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.05108482693563602, | |
| "grad_norm": 0.9099732041358948, | |
| "learning_rate": 4.9999842338357364e-05, | |
| "loss": 0.0277, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.051665336332631887, | |
| "grad_norm": 1.6560391187667847, | |
| "learning_rate": 4.999962582765702e-05, | |
| "loss": 0.0218, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.052245845729627746, | |
| "grad_norm": 1.163989543914795, | |
| "learning_rate": 4.999931718582432e-05, | |
| "loss": 0.0225, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.05282635512662361, | |
| "grad_norm": 7.1312971115112305, | |
| "learning_rate": 4.9998916413996715e-05, | |
| "loss": 0.0301, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.05340686452361948, | |
| "grad_norm": 10.158438682556152, | |
| "learning_rate": 4.999842351365117e-05, | |
| "loss": 0.0241, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.05398737392061534, | |
| "grad_norm": 0.19601650536060333, | |
| "learning_rate": 4.999783848660417e-05, | |
| "loss": 0.026, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.054567883317611206, | |
| "grad_norm": 0.716726541519165, | |
| "learning_rate": 4.999716133501171e-05, | |
| "loss": 0.0247, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.055148392714607065, | |
| "grad_norm": 2.3324790000915527, | |
| "learning_rate": 4.99963920613693e-05, | |
| "loss": 0.0237, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.05572890211160293, | |
| "grad_norm": 5.88576078414917, | |
| "learning_rate": 4.9995530668511946e-05, | |
| "loss": 0.0213, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.0563094115085988, | |
| "grad_norm": 0.17054684460163116, | |
| "learning_rate": 4.9994577159614144e-05, | |
| "loss": 0.0225, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.05688992090559466, | |
| "grad_norm": 1.9155703783035278, | |
| "learning_rate": 4.9993531538189854e-05, | |
| "loss": 0.0292, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.057470430302590525, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.99923938080925e-05, | |
| "loss": 0.025, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.058050939699586385, | |
| "grad_norm": 3.8306055068969727, | |
| "learning_rate": 4.999116397351497e-05, | |
| "loss": 0.0257, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.05863144909658225, | |
| "grad_norm": 2.8682026863098145, | |
| "learning_rate": 4.998984203898957e-05, | |
| "loss": 0.0276, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.05921195849357812, | |
| "grad_norm": 1.6766282320022583, | |
| "learning_rate": 4.9988428009388026e-05, | |
| "loss": 0.0261, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.05979246789057398, | |
| "grad_norm": 1.1323601007461548, | |
| "learning_rate": 4.998692188992147e-05, | |
| "loss": 0.0219, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.060372977287569844, | |
| "grad_norm": 1.3007240295410156, | |
| "learning_rate": 4.998532368614038e-05, | |
| "loss": 0.0321, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.060953486684565704, | |
| "grad_norm": 0.4534103572368622, | |
| "learning_rate": 4.998363340393465e-05, | |
| "loss": 0.0271, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.06153399608156157, | |
| "grad_norm": 1.0366442203521729, | |
| "learning_rate": 4.9981851049533446e-05, | |
| "loss": 0.0212, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.06211450547855744, | |
| "grad_norm": 0.7673536539077759, | |
| "learning_rate": 4.9979976629505305e-05, | |
| "loss": 0.0214, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.0626950148755533, | |
| "grad_norm": 6.375418186187744, | |
| "learning_rate": 4.9978010150758016e-05, | |
| "loss": 0.0241, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.06327552427254916, | |
| "grad_norm": 2.6672778129577637, | |
| "learning_rate": 4.9975951620538644e-05, | |
| "loss": 0.023, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.06385603366954502, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.9973801046433494e-05, | |
| "loss": 0.0238, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.06443654306654088, | |
| "grad_norm": 0.47239989042282104, | |
| "learning_rate": 4.997155843636808e-05, | |
| "loss": 0.023, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.06501705246353676, | |
| "grad_norm": 2.6296310424804688, | |
| "learning_rate": 4.996922379860708e-05, | |
| "loss": 0.0247, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.06559756186053262, | |
| "grad_norm": 1.4962682723999023, | |
| "learning_rate": 4.996679714175436e-05, | |
| "loss": 0.0217, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.06617807125752848, | |
| "grad_norm": 1.51057767868042, | |
| "learning_rate": 4.996427847475286e-05, | |
| "loss": 0.0249, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.06675858065452435, | |
| "grad_norm": 0.9044837951660156, | |
| "learning_rate": 4.9961667806884625e-05, | |
| "loss": 0.0274, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.06733909005152021, | |
| "grad_norm": 1.1930240392684937, | |
| "learning_rate": 4.9958965147770764e-05, | |
| "loss": 0.0249, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.06791959944851607, | |
| "grad_norm": 0.13868218660354614, | |
| "learning_rate": 4.995617050737138e-05, | |
| "loss": 0.0285, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.06850010884551194, | |
| "grad_norm": 1.4061717987060547, | |
| "learning_rate": 4.995328389598556e-05, | |
| "loss": 0.0235, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.0690806182425078, | |
| "grad_norm": 3.641000509262085, | |
| "learning_rate": 4.995030532425134e-05, | |
| "loss": 0.0179, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.06966112763950366, | |
| "grad_norm": 0.7060804963111877, | |
| "learning_rate": 4.994723480314565e-05, | |
| "loss": 0.0205, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.07024163703649954, | |
| "grad_norm": 0.5786570310592651, | |
| "learning_rate": 4.994407234398427e-05, | |
| "loss": 0.0236, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.0708221464334954, | |
| "grad_norm": 1.2352372407913208, | |
| "learning_rate": 4.994081795842183e-05, | |
| "loss": 0.0217, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.07140265583049125, | |
| "grad_norm": 4.269515037536621, | |
| "learning_rate": 4.9937471658451715e-05, | |
| "loss": 0.0207, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.07198316522748711, | |
| "grad_norm": 0.17409491539001465, | |
| "learning_rate": 4.9934033456406035e-05, | |
| "loss": 0.0186, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.07256367462448299, | |
| "grad_norm": 1.2208822965621948, | |
| "learning_rate": 4.993050336495562e-05, | |
| "loss": 0.0261, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.07314418402147885, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.9926881397109896e-05, | |
| "loss": 0.0285, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.07372469341847471, | |
| "grad_norm": 7.06033992767334, | |
| "learning_rate": 4.99231675662169e-05, | |
| "loss": 0.0202, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.07430520281547058, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.9919361885963234e-05, | |
| "loss": 0.0276, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.07488571221246644, | |
| "grad_norm": 0.406585693359375, | |
| "learning_rate": 4.991546437037396e-05, | |
| "loss": 0.02, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.0754662216094623, | |
| "grad_norm": 0.26512107253074646, | |
| "learning_rate": 4.9911475033812596e-05, | |
| "loss": 0.0234, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.07604673100645817, | |
| "grad_norm": 1.349731206893921, | |
| "learning_rate": 4.990739389098105e-05, | |
| "loss": 0.027, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.07662724040345403, | |
| "grad_norm": 4.345367908477783, | |
| "learning_rate": 4.990322095691956e-05, | |
| "loss": 0.02, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.07720774980044989, | |
| "grad_norm": 0.672780454158783, | |
| "learning_rate": 4.9898956247006636e-05, | |
| "loss": 0.0199, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.07778825919744575, | |
| "grad_norm": 0.8305972218513489, | |
| "learning_rate": 4.9894599776959015e-05, | |
| "loss": 0.0217, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.07836876859444163, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.9890151562831606e-05, | |
| "loss": 0.0245, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.07894927799143749, | |
| "grad_norm": 0.9174224734306335, | |
| "learning_rate": 4.9885611621017403e-05, | |
| "loss": 0.0184, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.07952978738843335, | |
| "grad_norm": 5.607458114624023, | |
| "learning_rate": 4.988097996824746e-05, | |
| "loss": 0.0196, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.08011029678542922, | |
| "grad_norm": 0.9925137162208557, | |
| "learning_rate": 4.987625662159083e-05, | |
| "loss": 0.021, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.08069080618242508, | |
| "grad_norm": 5.234767436981201, | |
| "learning_rate": 4.987144159845443e-05, | |
| "loss": 0.0226, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.08127131557942094, | |
| "grad_norm": 0.462223082780838, | |
| "learning_rate": 4.986653491658309e-05, | |
| "loss": 0.0201, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.08185182497641681, | |
| "grad_norm": 0.5672245025634766, | |
| "learning_rate": 4.986153659405939e-05, | |
| "loss": 0.0173, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.08243233437341267, | |
| "grad_norm": 7.8867011070251465, | |
| "learning_rate": 4.985644664930367e-05, | |
| "loss": 0.0173, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.08301284377040853, | |
| "grad_norm": 0.7449125647544861, | |
| "learning_rate": 4.9851265101073886e-05, | |
| "loss": 0.024, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.08359335316740439, | |
| "grad_norm": 0.16529901325702667, | |
| "learning_rate": 4.984599196846562e-05, | |
| "loss": 0.0227, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.08417386256440026, | |
| "grad_norm": 11.49720287322998, | |
| "learning_rate": 4.9840627270911934e-05, | |
| "loss": 0.0232, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.08475437196139612, | |
| "grad_norm": 1.724212884902954, | |
| "learning_rate": 4.9835171028183355e-05, | |
| "loss": 0.0222, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.08533488135839198, | |
| "grad_norm": 0.8492684364318848, | |
| "learning_rate": 4.982962326038778e-05, | |
| "loss": 0.0202, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.08591539075538786, | |
| "grad_norm": 1.6152923107147217, | |
| "learning_rate": 4.9823983987970396e-05, | |
| "loss": 0.0195, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.08649590015238372, | |
| "grad_norm": 0.52412348985672, | |
| "learning_rate": 4.981825323171362e-05, | |
| "loss": 0.0206, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.08707640954937958, | |
| "grad_norm": 0.41687363386154175, | |
| "learning_rate": 4.9812431012737006e-05, | |
| "loss": 0.023, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.08765691894637545, | |
| "grad_norm": 1.0384039878845215, | |
| "learning_rate": 4.9806517352497184e-05, | |
| "loss": 0.0244, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.08823742834337131, | |
| "grad_norm": 5.219573974609375, | |
| "learning_rate": 4.980051227278777e-05, | |
| "loss": 0.0209, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.08881793774036717, | |
| "grad_norm": 2.8465518951416016, | |
| "learning_rate": 4.979441579573928e-05, | |
| "loss": 0.0253, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.08939844713736303, | |
| "grad_norm": 7.259213924407959, | |
| "learning_rate": 4.978822794381908e-05, | |
| "loss": 0.0246, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.0899789565343589, | |
| "grad_norm": 1.2073447704315186, | |
| "learning_rate": 4.978194873983124e-05, | |
| "loss": 0.0168, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.09055946593135476, | |
| "grad_norm": 3.7044851779937744, | |
| "learning_rate": 4.977557820691653e-05, | |
| "loss": 0.0188, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.09113997532835062, | |
| "grad_norm": 0.4414062798023224, | |
| "learning_rate": 4.976911636855227e-05, | |
| "loss": 0.0224, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.0917204847253465, | |
| "grad_norm": 2.013897657394409, | |
| "learning_rate": 4.976256324855227e-05, | |
| "loss": 0.0198, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.09230099412234236, | |
| "grad_norm": 0.45843204855918884, | |
| "learning_rate": 4.975591887106677e-05, | |
| "loss": 0.0176, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.09288150351933822, | |
| "grad_norm": 1.0656216144561768, | |
| "learning_rate": 4.9749183260582274e-05, | |
| "loss": 0.0249, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.09346201291633409, | |
| "grad_norm": 0.3733506500720978, | |
| "learning_rate": 4.9742356441921544e-05, | |
| "loss": 0.0203, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.09404252231332995, | |
| "grad_norm": 0.9329887628555298, | |
| "learning_rate": 4.973543844024345e-05, | |
| "loss": 0.0218, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.09462303171032581, | |
| "grad_norm": 4.0852251052856445, | |
| "learning_rate": 4.972842928104291e-05, | |
| "loss": 0.027, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.09520354110732167, | |
| "grad_norm": 0.3162221610546112, | |
| "learning_rate": 4.9721328990150776e-05, | |
| "loss": 0.0225, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.09578405050431754, | |
| "grad_norm": 0.2578160762786865, | |
| "learning_rate": 4.971413759373376e-05, | |
| "loss": 0.0176, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.0963645599013134, | |
| "grad_norm": 0.3880905210971832, | |
| "learning_rate": 4.970685511829432e-05, | |
| "loss": 0.0183, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.09694506929830926, | |
| "grad_norm": 1.3224152326583862, | |
| "learning_rate": 4.969948159067056e-05, | |
| "loss": 0.0202, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.09752557869530513, | |
| "grad_norm": 1.7293118238449097, | |
| "learning_rate": 4.969201703803614e-05, | |
| "loss": 0.0234, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.098106088092301, | |
| "grad_norm": 1.8660351037979126, | |
| "learning_rate": 4.9684461487900195e-05, | |
| "loss": 0.0207, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.09868659748929685, | |
| "grad_norm": 2.0726287364959717, | |
| "learning_rate": 4.967681496810719e-05, | |
| "loss": 0.0218, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.09926710688629273, | |
| "grad_norm": 6.986308574676514, | |
| "learning_rate": 4.966907750683684e-05, | |
| "loss": 0.0194, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.09984761628328859, | |
| "grad_norm": 0.9702723622322083, | |
| "learning_rate": 4.966124913260402e-05, | |
| "loss": 0.022, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.10042812568028445, | |
| "grad_norm": 0.1596653163433075, | |
| "learning_rate": 4.9653329874258647e-05, | |
| "loss": 0.0195, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.1010086350772803, | |
| "grad_norm": 4.516726970672607, | |
| "learning_rate": 4.964531976098556e-05, | |
| "loss": 0.0216, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.10158914447427618, | |
| "grad_norm": 3.8601644039154053, | |
| "learning_rate": 4.9637218822304446e-05, | |
| "loss": 0.0211, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.10216965387127204, | |
| "grad_norm": 0.3691064715385437, | |
| "learning_rate": 4.962902708806968e-05, | |
| "loss": 0.0237, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.1027501632682679, | |
| "grad_norm": 1.635680913925171, | |
| "learning_rate": 4.9620744588470256e-05, | |
| "loss": 0.0229, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.10333067266526377, | |
| "grad_norm": 0.3783847391605377, | |
| "learning_rate": 4.9612371354029706e-05, | |
| "loss": 0.0167, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.10391118206225963, | |
| "grad_norm": 1.172295093536377, | |
| "learning_rate": 4.96039074156059e-05, | |
| "loss": 0.0217, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.10449169145925549, | |
| "grad_norm": 8.094454765319824, | |
| "learning_rate": 4.959535280439098e-05, | |
| "loss": 0.019, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.10507220085625137, | |
| "grad_norm": 0.4028318524360657, | |
| "learning_rate": 4.958670755191127e-05, | |
| "loss": 0.0234, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.10565271025324723, | |
| "grad_norm": 0.5673860907554626, | |
| "learning_rate": 4.9577971690027136e-05, | |
| "loss": 0.0214, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.10623321965024309, | |
| "grad_norm": 0.0952591523528099, | |
| "learning_rate": 4.956914525093283e-05, | |
| "loss": 0.0195, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.10681372904723896, | |
| "grad_norm": 0.11975416541099548, | |
| "learning_rate": 4.9560228267156445e-05, | |
| "loss": 0.0214, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.10739423844423482, | |
| "grad_norm": 0.31427842378616333, | |
| "learning_rate": 4.955122077155974e-05, | |
| "loss": 0.0204, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.10797474784123068, | |
| "grad_norm": 0.2761117219924927, | |
| "learning_rate": 4.9542122797338054e-05, | |
| "loss": 0.018, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.10855525723822654, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.953293437802014e-05, | |
| "loss": 0.0203, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.10913576663522241, | |
| "grad_norm": 1.229196548461914, | |
| "learning_rate": 4.9523655547468095e-05, | |
| "loss": 0.0209, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.10971627603221827, | |
| "grad_norm": 1.3233908414840698, | |
| "learning_rate": 4.951428633987719e-05, | |
| "loss": 0.0192, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.11029678542921413, | |
| "grad_norm": 4.7784857749938965, | |
| "learning_rate": 4.950482678977577e-05, | |
| "loss": 0.021, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.11087729482621, | |
| "grad_norm": 0.059221718460321426, | |
| "learning_rate": 4.949527693202513e-05, | |
| "loss": 0.0232, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.11145780422320586, | |
| "grad_norm": 1.453174352645874, | |
| "learning_rate": 4.9485636801819356e-05, | |
| "loss": 0.0222, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.11203831362020172, | |
| "grad_norm": 0.19113394618034363, | |
| "learning_rate": 4.947590643468523e-05, | |
| "loss": 0.0213, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.1126188230171976, | |
| "grad_norm": 0.06702837347984314, | |
| "learning_rate": 4.946608586648206e-05, | |
| "loss": 0.0262, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.11319933241419346, | |
| "grad_norm": 0.7900282144546509, | |
| "learning_rate": 4.945617513340162e-05, | |
| "loss": 0.0179, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.11377984181118932, | |
| "grad_norm": 0.9422081112861633, | |
| "learning_rate": 4.944617427196792e-05, | |
| "loss": 0.0179, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.11436035120818518, | |
| "grad_norm": 6.721597194671631, | |
| "learning_rate": 4.9436083319037134e-05, | |
| "loss": 0.0228, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.11494086060518105, | |
| "grad_norm": 2.548957109451294, | |
| "learning_rate": 4.942590231179747e-05, | |
| "loss": 0.0208, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.11552137000217691, | |
| "grad_norm": 2.1897802352905273, | |
| "learning_rate": 4.9415631287768995e-05, | |
| "loss": 0.0293, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.11610187939917277, | |
| "grad_norm": 1.0778768062591553, | |
| "learning_rate": 4.9405270284803516e-05, | |
| "loss": 0.0205, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.11668238879616864, | |
| "grad_norm": 0.8228683471679688, | |
| "learning_rate": 4.939481934108444e-05, | |
| "loss": 0.0182, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.1172628981931645, | |
| "grad_norm": 0.5803897976875305, | |
| "learning_rate": 4.938427849512664e-05, | |
| "loss": 0.0253, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.11784340759016036, | |
| "grad_norm": 1.7605079412460327, | |
| "learning_rate": 4.93736477857763e-05, | |
| "loss": 0.022, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.11842391698715624, | |
| "grad_norm": 6.337480068206787, | |
| "learning_rate": 4.9362927252210764e-05, | |
| "loss": 0.0167, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.1190044263841521, | |
| "grad_norm": 1.6917483806610107, | |
| "learning_rate": 4.935211693393844e-05, | |
| "loss": 0.0197, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.11958493578114796, | |
| "grad_norm": 3.9351799488067627, | |
| "learning_rate": 4.934121687079859e-05, | |
| "loss": 0.024, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.12016544517814381, | |
| "grad_norm": 1.328538417816162, | |
| "learning_rate": 4.933022710296121e-05, | |
| "loss": 0.0215, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.12074595457513969, | |
| "grad_norm": 0.6960548758506775, | |
| "learning_rate": 4.931914767092692e-05, | |
| "loss": 0.0214, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.12132646397213555, | |
| "grad_norm": 3.212674140930176, | |
| "learning_rate": 4.930797861552674e-05, | |
| "loss": 0.0201, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.12190697336913141, | |
| "grad_norm": 0.33600953221321106, | |
| "learning_rate": 4.929671997792199e-05, | |
| "loss": 0.0188, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.12248748276612728, | |
| "grad_norm": 0.37020212411880493, | |
| "learning_rate": 4.928537179960415e-05, | |
| "loss": 0.0172, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.12306799216312314, | |
| "grad_norm": 1.471659541130066, | |
| "learning_rate": 4.927393412239465e-05, | |
| "loss": 0.022, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.123648501560119, | |
| "grad_norm": 0.34243813157081604, | |
| "learning_rate": 4.9262406988444773e-05, | |
| "loss": 0.0186, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.12422901095711487, | |
| "grad_norm": 1.0350617170333862, | |
| "learning_rate": 4.9250790440235487e-05, | |
| "loss": 0.0192, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.12480952035411073, | |
| "grad_norm": 0.2393186092376709, | |
| "learning_rate": 4.923908452057723e-05, | |
| "loss": 0.0202, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.1253900297511066, | |
| "grad_norm": 0.8566457629203796, | |
| "learning_rate": 4.9227289272609855e-05, | |
| "loss": 0.0225, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.12597053914810247, | |
| "grad_norm": 3.1956393718719482, | |
| "learning_rate": 4.92154047398024e-05, | |
| "loss": 0.0238, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.12655104854509833, | |
| "grad_norm": 2.6811676025390625, | |
| "learning_rate": 4.920343096595291e-05, | |
| "loss": 0.0225, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.1271315579420942, | |
| "grad_norm": 0.4638591706752777, | |
| "learning_rate": 4.9191367995188376e-05, | |
| "loss": 0.018, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.12771206733909005, | |
| "grad_norm": 1.0029135942459106, | |
| "learning_rate": 4.917921587196444e-05, | |
| "loss": 0.0282, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.1282925767360859, | |
| "grad_norm": 1.0247883796691895, | |
| "learning_rate": 4.916697464106535e-05, | |
| "loss": 0.0196, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.12887308613308177, | |
| "grad_norm": 1.2868847846984863, | |
| "learning_rate": 4.915464434760369e-05, | |
| "loss": 0.0239, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.12945359553007765, | |
| "grad_norm": 1.3176194429397583, | |
| "learning_rate": 4.914222503702033e-05, | |
| "loss": 0.0174, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.1300341049270735, | |
| "grad_norm": 0.4307650029659271, | |
| "learning_rate": 4.912971675508414e-05, | |
| "loss": 0.0205, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.13061461432406937, | |
| "grad_norm": 0.6782764196395874, | |
| "learning_rate": 4.911711954789191e-05, | |
| "loss": 0.0155, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.13119512372106523, | |
| "grad_norm": 4.9244866371154785, | |
| "learning_rate": 4.910443346186812e-05, | |
| "loss": 0.0216, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.1317756331180611, | |
| "grad_norm": 0.7279213666915894, | |
| "learning_rate": 4.9091658543764816e-05, | |
| "loss": 0.0192, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.13235614251505695, | |
| "grad_norm": 0.7150142192840576, | |
| "learning_rate": 4.9078794840661415e-05, | |
| "loss": 0.023, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.13293665191205284, | |
| "grad_norm": 0.3685489594936371, | |
| "learning_rate": 4.906584239996451e-05, | |
| "loss": 0.022, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.1335171613090487, | |
| "grad_norm": 2.9621119499206543, | |
| "learning_rate": 4.905280126940775e-05, | |
| "loss": 0.0172, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.13409767070604456, | |
| "grad_norm": 3.5731096267700195, | |
| "learning_rate": 4.9039671497051623e-05, | |
| "loss": 0.0197, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.13467818010304042, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.902645313128327e-05, | |
| "loss": 0.0168, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.13525868950003628, | |
| "grad_norm": 0.6063820123672485, | |
| "learning_rate": 4.901314622081635e-05, | |
| "loss": 0.0178, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.13583919889703214, | |
| "grad_norm": 1.5601248741149902, | |
| "learning_rate": 4.8999750814690825e-05, | |
| "loss": 0.0153, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.136419708294028, | |
| "grad_norm": 1.3011990785598755, | |
| "learning_rate": 4.89862669622728e-05, | |
| "loss": 0.0182, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.13700021769102388, | |
| "grad_norm": 2.8852713108062744, | |
| "learning_rate": 4.897269471325431e-05, | |
| "loss": 0.0187, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.13758072708801974, | |
| "grad_norm": 0.9321713447570801, | |
| "learning_rate": 4.895903411765317e-05, | |
| "loss": 0.0163, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.1381612364850156, | |
| "grad_norm": 1.3897167444229126, | |
| "learning_rate": 4.894528522581279e-05, | |
| "loss": 0.0255, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.13874174588201146, | |
| "grad_norm": 0.32952451705932617, | |
| "learning_rate": 4.893144808840196e-05, | |
| "loss": 0.0206, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.13932225527900732, | |
| "grad_norm": 0.41645577549934387, | |
| "learning_rate": 4.891752275641468e-05, | |
| "loss": 0.0187, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.13990276467600318, | |
| "grad_norm": 1.9071933031082153, | |
| "learning_rate": 4.890350928117e-05, | |
| "loss": 0.0189, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.14048327407299907, | |
| "grad_norm": 11.354212760925293, | |
| "learning_rate": 4.888940771431178e-05, | |
| "loss": 0.0193, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.14106378346999493, | |
| "grad_norm": 0.22728995978832245, | |
| "learning_rate": 4.887521810780853e-05, | |
| "loss": 0.0197, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.1416442928669908, | |
| "grad_norm": 0.6692954897880554, | |
| "learning_rate": 4.88609405139532e-05, | |
| "loss": 0.0211, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.14222480226398665, | |
| "grad_norm": 0.3103114664554596, | |
| "learning_rate": 4.884657498536304e-05, | |
| "loss": 0.0171, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.1428053116609825, | |
| "grad_norm": 0.7131453156471252, | |
| "learning_rate": 4.8832121574979314e-05, | |
| "loss": 0.0171, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.14338582105797837, | |
| "grad_norm": 0.8742627501487732, | |
| "learning_rate": 4.88175803360672e-05, | |
| "loss": 0.0171, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.14396633045497423, | |
| "grad_norm": 1.464080810546875, | |
| "learning_rate": 4.880295132221552e-05, | |
| "loss": 0.0217, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.14454683985197012, | |
| "grad_norm": 0.1914157271385193, | |
| "learning_rate": 4.87882345873366e-05, | |
| "loss": 0.0226, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.14512734924896598, | |
| "grad_norm": 0.7907546162605286, | |
| "learning_rate": 4.877343018566601e-05, | |
| "loss": 0.014, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.14570785864596184, | |
| "grad_norm": 0.7815316319465637, | |
| "learning_rate": 4.875853817176243e-05, | |
| "loss": 0.0208, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.1462883680429577, | |
| "grad_norm": 0.8793790340423584, | |
| "learning_rate": 4.87435586005074e-05, | |
| "loss": 0.0188, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.14686887743995355, | |
| "grad_norm": 0.35067594051361084, | |
| "learning_rate": 4.872849152710515e-05, | |
| "loss": 0.0247, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.14744938683694941, | |
| "grad_norm": 0.6180335283279419, | |
| "learning_rate": 4.871333700708236e-05, | |
| "loss": 0.0202, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.14802989623394527, | |
| "grad_norm": 1.0985257625579834, | |
| "learning_rate": 4.8698095096288e-05, | |
| "loss": 0.0197, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.14861040563094116, | |
| "grad_norm": 0.47718220949172974, | |
| "learning_rate": 4.8682765850893085e-05, | |
| "loss": 0.019, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.14919091502793702, | |
| "grad_norm": 3.357231616973877, | |
| "learning_rate": 4.866734932739049e-05, | |
| "loss": 0.021, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.14977142442493288, | |
| "grad_norm": 0.9318442940711975, | |
| "learning_rate": 4.865184558259474e-05, | |
| "loss": 0.0185, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.15035193382192874, | |
| "grad_norm": 0.1340111941099167, | |
| "learning_rate": 4.863625467364179e-05, | |
| "loss": 0.0164, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.1509324432189246, | |
| "grad_norm": 1.3237409591674805, | |
| "learning_rate": 4.862057665798883e-05, | |
| "loss": 0.0195, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.15151295261592046, | |
| "grad_norm": 0.23375193774700165, | |
| "learning_rate": 4.860481159341405e-05, | |
| "loss": 0.0169, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.15209346201291635, | |
| "grad_norm": 3.856459617614746, | |
| "learning_rate": 4.858895953801644e-05, | |
| "loss": 0.0181, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.1526739714099122, | |
| "grad_norm": 0.30206841230392456, | |
| "learning_rate": 4.8573020550215606e-05, | |
| "loss": 0.0203, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.15325448080690807, | |
| "grad_norm": 0.32139310240745544, | |
| "learning_rate": 4.855699468875151e-05, | |
| "loss": 0.0153, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.15383499020390393, | |
| "grad_norm": 0.3401035964488983, | |
| "learning_rate": 4.854088201268425e-05, | |
| "loss": 0.02, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.15441549960089979, | |
| "grad_norm": 1.1033488512039185, | |
| "learning_rate": 4.852468258139388e-05, | |
| "loss": 0.019, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.15499600899789565, | |
| "grad_norm": 0.26150408387184143, | |
| "learning_rate": 4.8508396454580174e-05, | |
| "loss": 0.0217, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.1555765183948915, | |
| "grad_norm": 1.0009557008743286, | |
| "learning_rate": 4.849202369226241e-05, | |
| "loss": 0.0178, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.1561570277918874, | |
| "grad_norm": 0.949277400970459, | |
| "learning_rate": 4.8475564354779135e-05, | |
| "loss": 0.021, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.15673753718888325, | |
| "grad_norm": 0.5669822692871094, | |
| "learning_rate": 4.845901850278794e-05, | |
| "loss": 0.0203, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.1573180465858791, | |
| "grad_norm": 0.8850467205047607, | |
| "learning_rate": 4.844238619726528e-05, | |
| "loss": 0.0161, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.15789855598287497, | |
| "grad_norm": 1.5471025705337524, | |
| "learning_rate": 4.842566749950618e-05, | |
| "loss": 0.0169, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.15847906537987083, | |
| "grad_norm": 0.35896119475364685, | |
| "learning_rate": 4.8408862471124075e-05, | |
| "loss": 0.0232, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.1590595747768667, | |
| "grad_norm": 1.772006630897522, | |
| "learning_rate": 4.839197117405053e-05, | |
| "loss": 0.0195, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.15964008417386255, | |
| "grad_norm": 0.5070587396621704, | |
| "learning_rate": 4.837499367053508e-05, | |
| "loss": 0.0159, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.16022059357085844, | |
| "grad_norm": 0.4093703329563141, | |
| "learning_rate": 4.835793002314489e-05, | |
| "loss": 0.018, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.1608011029678543, | |
| "grad_norm": 0.20872582495212555, | |
| "learning_rate": 4.8340780294764655e-05, | |
| "loss": 0.0206, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.16138161236485016, | |
| "grad_norm": 1.1325550079345703, | |
| "learning_rate": 4.8323544548596256e-05, | |
| "loss": 0.0179, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.16196212176184602, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.8306222848158615e-05, | |
| "loss": 0.0198, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.16254263115884188, | |
| "grad_norm": 0.10438452661037445, | |
| "learning_rate": 4.828881525728739e-05, | |
| "loss": 0.0246, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.16312314055583774, | |
| "grad_norm": 0.5154972076416016, | |
| "learning_rate": 4.827132184013479e-05, | |
| "loss": 0.0203, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.16370364995283362, | |
| "grad_norm": 0.43772637844085693, | |
| "learning_rate": 4.825374266116931e-05, | |
| "loss": 0.0152, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.16428415934982948, | |
| "grad_norm": 0.9545837640762329, | |
| "learning_rate": 4.82360777851755e-05, | |
| "loss": 0.0193, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.16486466874682534, | |
| "grad_norm": 0.4668686091899872, | |
| "learning_rate": 4.821832727725375e-05, | |
| "loss": 0.0163, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.1654451781438212, | |
| "grad_norm": 1.1754403114318848, | |
| "learning_rate": 4.8200491202819995e-05, | |
| "loss": 0.018, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.16602568754081706, | |
| "grad_norm": 0.6412404179573059, | |
| "learning_rate": 4.8182569627605556e-05, | |
| "loss": 0.015, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.16660619693781292, | |
| "grad_norm": 3.5581717491149902, | |
| "learning_rate": 4.81645626176568e-05, | |
| "loss": 0.0128, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.16718670633480878, | |
| "grad_norm": 0.8452811241149902, | |
| "learning_rate": 4.814647023933497e-05, | |
| "loss": 0.021, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.16776721573180467, | |
| "grad_norm": 0.7324305772781372, | |
| "learning_rate": 4.812829255931592e-05, | |
| "loss": 0.0228, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.16834772512880053, | |
| "grad_norm": 2.6767971515655518, | |
| "learning_rate": 4.811002964458987e-05, | |
| "loss": 0.0194, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.1689282345257964, | |
| "grad_norm": 1.0238885879516602, | |
| "learning_rate": 4.809168156246113e-05, | |
| "loss": 0.0145, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.16950874392279225, | |
| "grad_norm": 0.48919227719306946, | |
| "learning_rate": 4.807324838054792e-05, | |
| "loss": 0.0199, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.1700892533197881, | |
| "grad_norm": 1.0482101440429688, | |
| "learning_rate": 4.8054730166782035e-05, | |
| "loss": 0.0204, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.17066976271678397, | |
| "grad_norm": 0.22599902749061584, | |
| "learning_rate": 4.8036126989408666e-05, | |
| "loss": 0.0197, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.17125027211377986, | |
| "grad_norm": 0.2554647922515869, | |
| "learning_rate": 4.80174389169861e-05, | |
| "loss": 0.0178, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.17183078151077572, | |
| "grad_norm": 1.0132629871368408, | |
| "learning_rate": 4.7998666018385506e-05, | |
| "loss": 0.0172, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.17241129090777157, | |
| "grad_norm": 3.19964599609375, | |
| "learning_rate": 4.7979808362790655e-05, | |
| "loss": 0.0183, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.17299180030476743, | |
| "grad_norm": 0.5451412200927734, | |
| "learning_rate": 4.796086601969768e-05, | |
| "loss": 0.0189, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.1735723097017633, | |
| "grad_norm": 0.18623612821102142, | |
| "learning_rate": 4.7941839058914796e-05, | |
| "loss": 0.0165, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.17415281909875915, | |
| "grad_norm": 1.2114591598510742, | |
| "learning_rate": 4.792272755056207e-05, | |
| "loss": 0.0185, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.174733328495755, | |
| "grad_norm": 0.8469420075416565, | |
| "learning_rate": 4.790353156507117e-05, | |
| "loss": 0.0191, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.1753138378927509, | |
| "grad_norm": 1.4895416498184204, | |
| "learning_rate": 4.7884251173185045e-05, | |
| "loss": 0.0202, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.17589434728974676, | |
| "grad_norm": 0.26737430691719055, | |
| "learning_rate": 4.786488644595775e-05, | |
| "loss": 0.0154, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.17647485668674262, | |
| "grad_norm": 1.4356130361557007, | |
| "learning_rate": 4.7845437454754116e-05, | |
| "loss": 0.0164, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.17705536608373848, | |
| "grad_norm": 0.28000500798225403, | |
| "learning_rate": 4.782590427124952e-05, | |
| "loss": 0.0158, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.17763587548073434, | |
| "grad_norm": 2.8763926029205322, | |
| "learning_rate": 4.7806286967429606e-05, | |
| "loss": 0.0182, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.1782163848777302, | |
| "grad_norm": 1.897760272026062, | |
| "learning_rate": 4.778658561559004e-05, | |
| "loss": 0.0255, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.17879689427472606, | |
| "grad_norm": 1.896153450012207, | |
| "learning_rate": 4.776680028833623e-05, | |
| "loss": 0.0187, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.17937740367172195, | |
| "grad_norm": 0.31827715039253235, | |
| "learning_rate": 4.7746931058583035e-05, | |
| "loss": 0.0172, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.1799579130687178, | |
| "grad_norm": 2.0092689990997314, | |
| "learning_rate": 4.772697799955455e-05, | |
| "loss": 0.0156, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.18053842246571367, | |
| "grad_norm": 3.24516224861145, | |
| "learning_rate": 4.7706941184783776e-05, | |
| "loss": 0.0157, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 0.18111893186270953, | |
| "grad_norm": 4.248687744140625, | |
| "learning_rate": 4.768682068811241e-05, | |
| "loss": 0.0223, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.18169944125970539, | |
| "grad_norm": 1.3310073614120483, | |
| "learning_rate": 4.7666616583690525e-05, | |
| "loss": 0.0181, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 0.18227995065670125, | |
| "grad_norm": 0.7074719071388245, | |
| "learning_rate": 4.764632894597632e-05, | |
| "loss": 0.0165, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.18286046005369713, | |
| "grad_norm": 1.0923957824707031, | |
| "learning_rate": 4.7625957849735826e-05, | |
| "loss": 0.0209, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.183440969450693, | |
| "grad_norm": 0.4126807749271393, | |
| "learning_rate": 4.760550337004266e-05, | |
| "loss": 0.021, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.18402147884768885, | |
| "grad_norm": 0.7564171552658081, | |
| "learning_rate": 4.758496558227771e-05, | |
| "loss": 0.02, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 0.1846019882446847, | |
| "grad_norm": 5.621452808380127, | |
| "learning_rate": 4.756434456212892e-05, | |
| "loss": 0.0218, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.18518249764168057, | |
| "grad_norm": 0.5979048013687134, | |
| "learning_rate": 4.7543640385590925e-05, | |
| "loss": 0.018, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 0.18576300703867643, | |
| "grad_norm": 0.4124324917793274, | |
| "learning_rate": 4.752285312896485e-05, | |
| "loss": 0.0192, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.1863435164356723, | |
| "grad_norm": 19.721843719482422, | |
| "learning_rate": 4.750198286885797e-05, | |
| "loss": 0.0191, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 0.18692402583266818, | |
| "grad_norm": 1.7610396146774292, | |
| "learning_rate": 4.748102968218347e-05, | |
| "loss": 0.0205, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.18750453522966404, | |
| "grad_norm": 0.6155940890312195, | |
| "learning_rate": 4.745999364616014e-05, | |
| "loss": 0.0233, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 0.1880850446266599, | |
| "grad_norm": 1.3487342596054077, | |
| "learning_rate": 4.743887483831208e-05, | |
| "loss": 0.0182, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.18866555402365576, | |
| "grad_norm": 1.6848351955413818, | |
| "learning_rate": 4.741767333646846e-05, | |
| "loss": 0.0196, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.18924606342065162, | |
| "grad_norm": 0.3916856348514557, | |
| "learning_rate": 4.739638921876317e-05, | |
| "loss": 0.0157, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.18982657281764748, | |
| "grad_norm": 0.43171945214271545, | |
| "learning_rate": 4.737502256363459e-05, | |
| "loss": 0.015, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 0.19040708221464334, | |
| "grad_norm": 2.9373903274536133, | |
| "learning_rate": 4.735357344982525e-05, | |
| "loss": 0.0182, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.19098759161163922, | |
| "grad_norm": 1.7581216096878052, | |
| "learning_rate": 4.733204195638159e-05, | |
| "loss": 0.021, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 0.19156810100863508, | |
| "grad_norm": 0.5465153455734253, | |
| "learning_rate": 4.731042816265364e-05, | |
| "loss": 0.0165, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.19214861040563094, | |
| "grad_norm": 1.2427330017089844, | |
| "learning_rate": 4.72887321482947e-05, | |
| "loss": 0.0172, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 0.1927291198026268, | |
| "grad_norm": 1.4515526294708252, | |
| "learning_rate": 4.726695399326113e-05, | |
| "loss": 0.0166, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.19330962919962266, | |
| "grad_norm": 0.47813165187835693, | |
| "learning_rate": 4.7245093777811945e-05, | |
| "loss": 0.0165, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 0.19389013859661852, | |
| "grad_norm": 0.4670131206512451, | |
| "learning_rate": 4.722315158250863e-05, | |
| "loss": 0.0171, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.1944706479936144, | |
| "grad_norm": 1.1390752792358398, | |
| "learning_rate": 4.720112748821475e-05, | |
| "loss": 0.0219, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.19505115739061027, | |
| "grad_norm": 0.6364420652389526, | |
| "learning_rate": 4.7179021576095724e-05, | |
| "loss": 0.0186, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.19563166678760613, | |
| "grad_norm": 0.0812646821141243, | |
| "learning_rate": 4.7156833927618475e-05, | |
| "loss": 0.0184, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 0.196212176184602, | |
| "grad_norm": 0.5782191753387451, | |
| "learning_rate": 4.713456462455116e-05, | |
| "loss": 0.0212, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.19679268558159785, | |
| "grad_norm": 0.3221706449985504, | |
| "learning_rate": 4.711221374896283e-05, | |
| "loss": 0.0183, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 0.1973731949785937, | |
| "grad_norm": 0.4310432970523834, | |
| "learning_rate": 4.7089781383223203e-05, | |
| "loss": 0.0194, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.19795370437558957, | |
| "grad_norm": 0.4918677508831024, | |
| "learning_rate": 4.706726761000227e-05, | |
| "loss": 0.0192, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 0.19853421377258545, | |
| "grad_norm": 0.8517147302627563, | |
| "learning_rate": 4.704467251227006e-05, | |
| "loss": 0.0179, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 0.19911472316958131, | |
| "grad_norm": 0.8899397850036621, | |
| "learning_rate": 4.702199617329629e-05, | |
| "loss": 0.0216, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 0.19969523256657717, | |
| "grad_norm": 1.310464859008789, | |
| "learning_rate": 4.6999238676650074e-05, | |
| "loss": 0.0196, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.20027574196357303, | |
| "grad_norm": 2.9320359230041504, | |
| "learning_rate": 4.697640010619965e-05, | |
| "loss": 0.0167, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.2008562513605689, | |
| "grad_norm": 0.2071259319782257, | |
| "learning_rate": 4.6953480546111986e-05, | |
| "loss": 0.019, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 0.20143676075756475, | |
| "grad_norm": 5.163755893707275, | |
| "learning_rate": 4.6930480080852553e-05, | |
| "loss": 0.0147, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 0.2020172701545606, | |
| "grad_norm": 0.6986225843429565, | |
| "learning_rate": 4.6907398795184995e-05, | |
| "loss": 0.0248, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.2025977795515565, | |
| "grad_norm": 0.5469736456871033, | |
| "learning_rate": 4.6884236774170766e-05, | |
| "loss": 0.0147, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 0.20317828894855236, | |
| "grad_norm": 1.1931012868881226, | |
| "learning_rate": 4.686099410316888e-05, | |
| "loss": 0.0183, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.20375879834554822, | |
| "grad_norm": 3.972321033477783, | |
| "learning_rate": 4.6837670867835546e-05, | |
| "loss": 0.0199, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 0.20433930774254408, | |
| "grad_norm": 0.9743729829788208, | |
| "learning_rate": 4.681426715412392e-05, | |
| "loss": 0.0161, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.20491981713953994, | |
| "grad_norm": 1.3011478185653687, | |
| "learning_rate": 4.67907830482837e-05, | |
| "loss": 0.0177, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 0.2055003265365358, | |
| "grad_norm": 0.2887895405292511, | |
| "learning_rate": 4.676721863686088e-05, | |
| "loss": 0.0156, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 0.2060808359335317, | |
| "grad_norm": 0.5841540098190308, | |
| "learning_rate": 4.67435740066974e-05, | |
| "loss": 0.0201, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.20666134533052755, | |
| "grad_norm": 0.38853272795677185, | |
| "learning_rate": 4.671984924493081e-05, | |
| "loss": 0.0185, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.2072418547275234, | |
| "grad_norm": 2.471120834350586, | |
| "learning_rate": 4.6696044438994004e-05, | |
| "loss": 0.0201, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 0.20782236412451927, | |
| "grad_norm": 1.8866631984710693, | |
| "learning_rate": 4.667215967661483e-05, | |
| "loss": 0.0199, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 0.20840287352151513, | |
| "grad_norm": 0.6856237053871155, | |
| "learning_rate": 4.664819504581582e-05, | |
| "loss": 0.0161, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 0.20898338291851098, | |
| "grad_norm": 0.3085954487323761, | |
| "learning_rate": 4.662415063491384e-05, | |
| "loss": 0.0173, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.20956389231550684, | |
| "grad_norm": 0.6165205240249634, | |
| "learning_rate": 4.660002653251977e-05, | |
| "loss": 0.0184, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 0.21014440171250273, | |
| "grad_norm": 3.60754132270813, | |
| "learning_rate": 4.657582282753816e-05, | |
| "loss": 0.0212, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 0.2107249111094986, | |
| "grad_norm": 0.378738135099411, | |
| "learning_rate": 4.655153960916695e-05, | |
| "loss": 0.0247, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 0.21130542050649445, | |
| "grad_norm": 1.4534658193588257, | |
| "learning_rate": 4.652717696689709e-05, | |
| "loss": 0.02, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 0.2118859299034903, | |
| "grad_norm": 0.41120943427085876, | |
| "learning_rate": 4.6502734990512255e-05, | |
| "loss": 0.0136, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.21246643930048617, | |
| "grad_norm": 11.224574089050293, | |
| "learning_rate": 4.647821377008844e-05, | |
| "loss": 0.0208, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 0.21304694869748203, | |
| "grad_norm": 2.768289089202881, | |
| "learning_rate": 4.645361339599373e-05, | |
| "loss": 0.0174, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 0.21362745809447792, | |
| "grad_norm": 1.1909620761871338, | |
| "learning_rate": 4.6428933958887885e-05, | |
| "loss": 0.0194, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 0.21420796749147378, | |
| "grad_norm": 2.583638906478882, | |
| "learning_rate": 4.6404175549722055e-05, | |
| "loss": 0.0151, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 0.21478847688846964, | |
| "grad_norm": 1.1585899591445923, | |
| "learning_rate": 4.6379338259738414e-05, | |
| "loss": 0.019, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.2153689862854655, | |
| "grad_norm": 0.24345244467258453, | |
| "learning_rate": 4.6354422180469834e-05, | |
| "loss": 0.0158, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 0.21594949568246136, | |
| "grad_norm": 0.9591251611709595, | |
| "learning_rate": 4.632942740373955e-05, | |
| "loss": 0.0162, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 0.21653000507945722, | |
| "grad_norm": 0.12427254766225815, | |
| "learning_rate": 4.630435402166083e-05, | |
| "loss": 0.0291, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 0.21711051447645308, | |
| "grad_norm": 12.628028869628906, | |
| "learning_rate": 4.6279202126636624e-05, | |
| "loss": 0.0147, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 0.21769102387344896, | |
| "grad_norm": 0.6221101880073547, | |
| "learning_rate": 4.625397181135922e-05, | |
| "loss": 0.0188, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.21827153327044482, | |
| "grad_norm": 0.11033707112073898, | |
| "learning_rate": 4.6228663168809904e-05, | |
| "loss": 0.0141, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 0.21885204266744068, | |
| "grad_norm": 0.5746279954910278, | |
| "learning_rate": 4.620327629225863e-05, | |
| "loss": 0.0169, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 0.21943255206443654, | |
| "grad_norm": 1.458079218864441, | |
| "learning_rate": 4.6177811275263665e-05, | |
| "loss": 0.0195, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 0.2200130614614324, | |
| "grad_norm": 8.152167320251465, | |
| "learning_rate": 4.615226821167126e-05, | |
| "loss": 0.0155, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 0.22059357085842826, | |
| "grad_norm": 2.139084577560425, | |
| "learning_rate": 4.612664719561526e-05, | |
| "loss": 0.0179, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.22117408025542412, | |
| "grad_norm": 2.3556296825408936, | |
| "learning_rate": 4.610094832151681e-05, | |
| "loss": 0.0187, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 0.22175458965242, | |
| "grad_norm": 0.26898398995399475, | |
| "learning_rate": 4.6075171684084e-05, | |
| "loss": 0.0208, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 0.22233509904941587, | |
| "grad_norm": 0.16663870215415955, | |
| "learning_rate": 4.604931737831146e-05, | |
| "loss": 0.0189, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 0.22291560844641173, | |
| "grad_norm": 4.2696943283081055, | |
| "learning_rate": 4.60233854994801e-05, | |
| "loss": 0.0168, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.2234961178434076, | |
| "grad_norm": 1.2985143661499023, | |
| "learning_rate": 4.5997376143156654e-05, | |
| "loss": 0.0161, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.22407662724040345, | |
| "grad_norm": 0.7481074333190918, | |
| "learning_rate": 4.597128940519344e-05, | |
| "loss": 0.0132, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 0.2246571366373993, | |
| "grad_norm": 1.7323493957519531, | |
| "learning_rate": 4.5945125381727924e-05, | |
| "loss": 0.0147, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 0.2252376460343952, | |
| "grad_norm": 0.9689391851425171, | |
| "learning_rate": 4.591888416918238e-05, | |
| "loss": 0.0175, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 0.22581815543139105, | |
| "grad_norm": 0.9196175336837769, | |
| "learning_rate": 4.589256586426356e-05, | |
| "loss": 0.0167, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 0.22639866482838691, | |
| "grad_norm": 1.5890405178070068, | |
| "learning_rate": 4.586617056396234e-05, | |
| "loss": 0.0198, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.22697917422538277, | |
| "grad_norm": 4.1506829261779785, | |
| "learning_rate": 4.583969836555333e-05, | |
| "loss": 0.015, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 0.22755968362237863, | |
| "grad_norm": 1.0635732412338257, | |
| "learning_rate": 4.581314936659451e-05, | |
| "loss": 0.0186, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 0.2281401930193745, | |
| "grad_norm": 1.0515848398208618, | |
| "learning_rate": 4.578652366492695e-05, | |
| "loss": 0.0248, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 0.22872070241637035, | |
| "grad_norm": 0.3499925434589386, | |
| "learning_rate": 4.5759821358674346e-05, | |
| "loss": 0.0176, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 0.22930121181336624, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.573304254624271e-05, | |
| "loss": 0.0164, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.2298817212103621, | |
| "grad_norm": 0.6939957141876221, | |
| "learning_rate": 4.570618732632003e-05, | |
| "loss": 0.0191, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 0.23046223060735796, | |
| "grad_norm": 0.3061050772666931, | |
| "learning_rate": 4.5679255797875856e-05, | |
| "loss": 0.0188, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 0.23104274000435382, | |
| "grad_norm": 0.3814498484134674, | |
| "learning_rate": 4.565224806016095e-05, | |
| "loss": 0.0164, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 0.23162324940134968, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.562516421270695e-05, | |
| "loss": 0.017, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 0.23220375879834554, | |
| "grad_norm": 1.1685712337493896, | |
| "learning_rate": 4.559800435532596e-05, | |
| "loss": 0.018, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.2327842681953414, | |
| "grad_norm": 0.8218551874160767, | |
| "learning_rate": 4.5570768588110235e-05, | |
| "loss": 0.0162, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 0.23336477759233729, | |
| "grad_norm": 2.128337860107422, | |
| "learning_rate": 4.5543457011431744e-05, | |
| "loss": 0.0178, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 0.23394528698933315, | |
| "grad_norm": 36.10731887817383, | |
| "learning_rate": 4.5516069725941854e-05, | |
| "loss": 0.0185, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 0.234525796386329, | |
| "grad_norm": 1.233340859413147, | |
| "learning_rate": 4.548860683257096e-05, | |
| "loss": 0.0175, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 0.23510630578332486, | |
| "grad_norm": 0.9268346428871155, | |
| "learning_rate": 4.546106843252804e-05, | |
| "loss": 0.0245, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.23568681518032072, | |
| "grad_norm": 0.29465100169181824, | |
| "learning_rate": 4.54334546273004e-05, | |
| "loss": 0.0211, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 0.23626732457731658, | |
| "grad_norm": 0.3362191319465637, | |
| "learning_rate": 4.5405765518653204e-05, | |
| "loss": 0.0151, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 0.23684783397431247, | |
| "grad_norm": 0.8512314558029175, | |
| "learning_rate": 4.537800120862913e-05, | |
| "loss": 0.0162, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 0.23742834337130833, | |
| "grad_norm": 0.07062964141368866, | |
| "learning_rate": 4.5350161799548e-05, | |
| "loss": 0.0162, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 0.2380088527683042, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.5322247394006415e-05, | |
| "loss": 0.0164, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.23858936216530005, | |
| "grad_norm": 6.001936435699463, | |
| "learning_rate": 4.529425809487733e-05, | |
| "loss": 0.018, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 0.2391698715622959, | |
| "grad_norm": 1.6303260326385498, | |
| "learning_rate": 4.526619400530973e-05, | |
| "loss": 0.0154, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 0.23975038095929177, | |
| "grad_norm": 0.2997598648071289, | |
| "learning_rate": 4.523805522872822e-05, | |
| "loss": 0.0133, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 0.24033089035628763, | |
| "grad_norm": 0.1703944355249405, | |
| "learning_rate": 4.5209841868832635e-05, | |
| "loss": 0.0161, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 0.24091139975328352, | |
| "grad_norm": 2.1550252437591553, | |
| "learning_rate": 4.51815540295977e-05, | |
| "loss": 0.0148, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.24149190915027938, | |
| "grad_norm": 3.253269910812378, | |
| "learning_rate": 4.515319181527259e-05, | |
| "loss": 0.0197, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 0.24207241854727524, | |
| "grad_norm": 0.663278341293335, | |
| "learning_rate": 4.512475533038059e-05, | |
| "loss": 0.0152, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 0.2426529279442711, | |
| "grad_norm": 0.05046732723712921, | |
| "learning_rate": 4.5096244679718676e-05, | |
| "loss": 0.0207, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 0.24323343734126696, | |
| "grad_norm": 2.685068368911743, | |
| "learning_rate": 4.506765996835718e-05, | |
| "loss": 0.0154, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 0.24381394673826282, | |
| "grad_norm": 0.698753833770752, | |
| "learning_rate": 4.503900130163935e-05, | |
| "loss": 0.0161, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.2443944561352587, | |
| "grad_norm": 0.11762864887714386, | |
| "learning_rate": 4.501026878518097e-05, | |
| "loss": 0.0187, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 0.24497496553225456, | |
| "grad_norm": 1.077953577041626, | |
| "learning_rate": 4.498146252487002e-05, | |
| "loss": 0.0185, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 0.24555547492925042, | |
| "grad_norm": 0.18901602923870087, | |
| "learning_rate": 4.49525826268662e-05, | |
| "loss": 0.0153, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 0.24613598432624628, | |
| "grad_norm": 1.0348716974258423, | |
| "learning_rate": 4.492362919760063e-05, | |
| "loss": 0.0178, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 0.24671649372324214, | |
| "grad_norm": 0.6340203285217285, | |
| "learning_rate": 4.489460234377538e-05, | |
| "loss": 0.0158, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.247297003120238, | |
| "grad_norm": 1.0056567192077637, | |
| "learning_rate": 4.4865502172363126e-05, | |
| "loss": 0.0189, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 0.24787751251723386, | |
| "grad_norm": 2.1102306842803955, | |
| "learning_rate": 4.483632879060676e-05, | |
| "loss": 0.0158, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 0.24845802191422975, | |
| "grad_norm": 0.1478302925825119, | |
| "learning_rate": 4.480708230601895e-05, | |
| "loss": 0.0166, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 0.2490385313112256, | |
| "grad_norm": 0.5817315578460693, | |
| "learning_rate": 4.4777762826381775e-05, | |
| "loss": 0.0202, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 0.24961904070822147, | |
| "grad_norm": 2.631985664367676, | |
| "learning_rate": 4.4748370459746334e-05, | |
| "loss": 0.0151, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.2501995501052173, | |
| "grad_norm": 0.3138137459754944, | |
| "learning_rate": 4.471890531443232e-05, | |
| "loss": 0.0188, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 0.2507800595022132, | |
| "grad_norm": 0.6783995628356934, | |
| "learning_rate": 4.4689367499027654e-05, | |
| "loss": 0.0195, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 0.25136056889920905, | |
| "grad_norm": 2.4091744422912598, | |
| "learning_rate": 4.4659757122388066e-05, | |
| "loss": 0.0158, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 0.25194107829620493, | |
| "grad_norm": 1.260305643081665, | |
| "learning_rate": 4.463007429363668e-05, | |
| "loss": 0.0186, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 0.25252158769320077, | |
| "grad_norm": 1.3665226697921753, | |
| "learning_rate": 4.460031912216363e-05, | |
| "loss": 0.0163, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.25310209709019665, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.457049171762568e-05, | |
| "loss": 0.0163, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 0.2536826064871925, | |
| "grad_norm": 0.5342715382575989, | |
| "learning_rate": 4.454059218994577e-05, | |
| "loss": 0.0164, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 0.2542631158841884, | |
| "grad_norm": 0.4309409558773041, | |
| "learning_rate": 4.4510620649312643e-05, | |
| "loss": 0.0182, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 0.25484362528118426, | |
| "grad_norm": 0.212936669588089, | |
| "learning_rate": 4.4480577206180436e-05, | |
| "loss": 0.0145, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 0.2554241346781801, | |
| "grad_norm": 2.9309751987457275, | |
| "learning_rate": 4.4450461971268256e-05, | |
| "loss": 0.0152, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.256004644075176, | |
| "grad_norm": 6.400189399719238, | |
| "learning_rate": 4.4420275055559795e-05, | |
| "loss": 0.0172, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 0.2565851534721718, | |
| "grad_norm": 0.43217283487319946, | |
| "learning_rate": 4.43900165703029e-05, | |
| "loss": 0.0223, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 0.2571656628691677, | |
| "grad_norm": 0.4472719132900238, | |
| "learning_rate": 4.4359686627009204e-05, | |
| "loss": 0.0166, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 0.25774617226616353, | |
| "grad_norm": 0.8468680381774902, | |
| "learning_rate": 4.432928533745364e-05, | |
| "loss": 0.0267, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 0.2583266816631594, | |
| "grad_norm": 0.5906082987785339, | |
| "learning_rate": 4.4298812813674096e-05, | |
| "loss": 0.019, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.2589071910601553, | |
| "grad_norm": 0.6340333819389343, | |
| "learning_rate": 4.4268269167970977e-05, | |
| "loss": 0.015, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 0.25948770045715114, | |
| "grad_norm": 3.455953598022461, | |
| "learning_rate": 4.42376545129068e-05, | |
| "loss": 0.0179, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 0.260068209854147, | |
| "grad_norm": 1.7209389209747314, | |
| "learning_rate": 4.420696896130576e-05, | |
| "loss": 0.0182, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 0.26064871925114286, | |
| "grad_norm": 0.23180946707725525, | |
| "learning_rate": 4.417621262625334e-05, | |
| "loss": 0.0234, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 0.26122922864813874, | |
| "grad_norm": 1.2846555709838867, | |
| "learning_rate": 4.414538562109588e-05, | |
| "loss": 0.0185, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.26180973804513463, | |
| "grad_norm": 1.6205638647079468, | |
| "learning_rate": 4.411448805944015e-05, | |
| "loss": 0.0164, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 0.26239024744213046, | |
| "grad_norm": 1.5716558694839478, | |
| "learning_rate": 4.408352005515295e-05, | |
| "loss": 0.0152, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 0.26297075683912635, | |
| "grad_norm": 2.2886829376220703, | |
| "learning_rate": 4.4052481722360675e-05, | |
| "loss": 0.0124, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 0.2635512662361222, | |
| "grad_norm": 0.44392430782318115, | |
| "learning_rate": 4.402137317544891e-05, | |
| "loss": 0.0182, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 0.26413177563311807, | |
| "grad_norm": 1.4996153116226196, | |
| "learning_rate": 4.399019452906199e-05, | |
| "loss": 0.0181, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.2647122850301139, | |
| "grad_norm": 0.37333735823631287, | |
| "learning_rate": 4.395894589810261e-05, | |
| "loss": 0.0187, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 0.2652927944271098, | |
| "grad_norm": 1.011172890663147, | |
| "learning_rate": 4.392762739773135e-05, | |
| "loss": 0.0132, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 0.2658733038241057, | |
| "grad_norm": 0.7283264398574829, | |
| "learning_rate": 4.389623914336631e-05, | |
| "loss": 0.0212, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 0.2664538132211015, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.386478125068262e-05, | |
| "loss": 0.0191, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 0.2670343226180974, | |
| "grad_norm": 0.8834022879600525, | |
| "learning_rate": 4.3833253835612074e-05, | |
| "loss": 0.018, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.26761483201509323, | |
| "grad_norm": 0.6831231713294983, | |
| "learning_rate": 4.380165701434267e-05, | |
| "loss": 0.0145, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 0.2681953414120891, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.376999090331818e-05, | |
| "loss": 0.0154, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 0.26877585080908495, | |
| "grad_norm": 0.10557834059000015, | |
| "learning_rate": 4.3738255619237745e-05, | |
| "loss": 0.0124, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 0.26935636020608084, | |
| "grad_norm": 1.9961583614349365, | |
| "learning_rate": 4.370645127905542e-05, | |
| "loss": 0.0208, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 0.2699368696030767, | |
| "grad_norm": 0.8881611824035645, | |
| "learning_rate": 4.367457799997976e-05, | |
| "loss": 0.0132, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.27051737900007256, | |
| "grad_norm": 0.7779310345649719, | |
| "learning_rate": 4.3642635899473364e-05, | |
| "loss": 0.0207, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 0.27109788839706844, | |
| "grad_norm": 1.000813364982605, | |
| "learning_rate": 4.3610625095252474e-05, | |
| "loss": 0.0217, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 0.2716783977940643, | |
| "grad_norm": 0.2656024396419525, | |
| "learning_rate": 4.357854570528652e-05, | |
| "loss": 0.0197, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 0.27225890719106016, | |
| "grad_norm": 0.3503284156322479, | |
| "learning_rate": 4.3546397847797695e-05, | |
| "loss": 0.0155, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 0.272839416588056, | |
| "grad_norm": 2.816612482070923, | |
| "learning_rate": 4.3514181641260515e-05, | |
| "loss": 0.0196, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.2734199259850519, | |
| "grad_norm": 0.44417452812194824, | |
| "learning_rate": 4.3481897204401376e-05, | |
| "loss": 0.0164, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 0.27400043538204777, | |
| "grad_norm": 1.1510508060455322, | |
| "learning_rate": 4.3449544656198123e-05, | |
| "loss": 0.0155, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 0.2745809447790436, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.3417124115879623e-05, | |
| "loss": 0.0151, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 0.2751614541760395, | |
| "grad_norm": 0.687237024307251, | |
| "learning_rate": 4.3384635702925315e-05, | |
| "loss": 0.0161, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 0.2757419635730353, | |
| "grad_norm": 15.054317474365234, | |
| "learning_rate": 4.335207953706475e-05, | |
| "loss": 0.0159, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.2763224729700312, | |
| "grad_norm": 0.6675468683242798, | |
| "learning_rate": 4.3319455738277184e-05, | |
| "loss": 0.0226, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 0.27690298236702704, | |
| "grad_norm": 0.44973939657211304, | |
| "learning_rate": 4.328676442679112e-05, | |
| "loss": 0.0161, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 0.2774834917640229, | |
| "grad_norm": 0.5629274249076843, | |
| "learning_rate": 4.3254005723083855e-05, | |
| "loss": 0.0145, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 0.2780640011610188, | |
| "grad_norm": 1.0498775243759155, | |
| "learning_rate": 4.322117974788107e-05, | |
| "loss": 0.0166, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 0.27864451055801465, | |
| "grad_norm": 0.404738187789917, | |
| "learning_rate": 4.318828662215633e-05, | |
| "loss": 0.0148, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.27922501995501053, | |
| "grad_norm": 0.3776521384716034, | |
| "learning_rate": 4.3155326467130696e-05, | |
| "loss": 0.0208, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 0.27980552935200637, | |
| "grad_norm": 0.3664938807487488, | |
| "learning_rate": 4.312229940427224e-05, | |
| "loss": 0.014, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 0.28038603874900225, | |
| "grad_norm": 0.41216275095939636, | |
| "learning_rate": 4.308920555529561e-05, | |
| "loss": 0.0159, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 0.28096654814599814, | |
| "grad_norm": 0.7025476694107056, | |
| "learning_rate": 4.305604504216157e-05, | |
| "loss": 0.0144, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 0.281547057542994, | |
| "grad_norm": 0.5805770754814148, | |
| "learning_rate": 4.3022817987076615e-05, | |
| "loss": 0.0206, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.28212756693998986, | |
| "grad_norm": 0.10411791503429413, | |
| "learning_rate": 4.298952451249238e-05, | |
| "loss": 0.0139, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 0.2827080763369857, | |
| "grad_norm": 1.3499836921691895, | |
| "learning_rate": 4.295616474110534e-05, | |
| "loss": 0.0168, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 0.2832885857339816, | |
| "grad_norm": 0.8422473073005676, | |
| "learning_rate": 4.292273879585628e-05, | |
| "loss": 0.0169, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 0.2838690951309774, | |
| "grad_norm": 1.0992027521133423, | |
| "learning_rate": 4.288924679992985e-05, | |
| "loss": 0.0179, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 0.2844496045279733, | |
| "grad_norm": 0.9558140635490417, | |
| "learning_rate": 4.2855688876754104e-05, | |
| "loss": 0.0162, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.2850301139249692, | |
| "grad_norm": 2.5504753589630127, | |
| "learning_rate": 4.2822065150000105e-05, | |
| "loss": 0.0125, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 0.285610623321965, | |
| "grad_norm": 6.963260650634766, | |
| "learning_rate": 4.278837574358134e-05, | |
| "loss": 0.0145, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 0.2861911327189609, | |
| "grad_norm": 0.19555258750915527, | |
| "learning_rate": 4.275462078165343e-05, | |
| "loss": 0.0144, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 0.28677164211595674, | |
| "grad_norm": 0.13861818611621857, | |
| "learning_rate": 4.2720800388613545e-05, | |
| "loss": 0.015, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 0.2873521515129526, | |
| "grad_norm": 0.10115107148885727, | |
| "learning_rate": 4.2686914689099986e-05, | |
| "loss": 0.0208, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.28793266090994846, | |
| "grad_norm": 0.12271959334611893, | |
| "learning_rate": 4.265296380799174e-05, | |
| "loss": 0.0177, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 0.28851317030694434, | |
| "grad_norm": 0.6255984902381897, | |
| "learning_rate": 4.261894787040801e-05, | |
| "loss": 0.0142, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 0.28909367970394023, | |
| "grad_norm": 0.24981549382209778, | |
| "learning_rate": 4.258486700170774e-05, | |
| "loss": 0.0129, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 0.28967418910093606, | |
| "grad_norm": 0.42702168226242065, | |
| "learning_rate": 4.2550721327489165e-05, | |
| "loss": 0.0197, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 0.29025469849793195, | |
| "grad_norm": 0.2005091905593872, | |
| "learning_rate": 4.2516510973589366e-05, | |
| "loss": 0.0165, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.2908352078949278, | |
| "grad_norm": 0.18545077741146088, | |
| "learning_rate": 4.248223606608378e-05, | |
| "loss": 0.0197, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 0.29141571729192367, | |
| "grad_norm": 2.60361385345459, | |
| "learning_rate": 4.244789673128572e-05, | |
| "loss": 0.0208, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 0.2919962266889195, | |
| "grad_norm": 1.1765265464782715, | |
| "learning_rate": 4.241349309574596e-05, | |
| "loss": 0.0161, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 0.2925767360859154, | |
| "grad_norm": 0.3382522463798523, | |
| "learning_rate": 4.237902528625224e-05, | |
| "loss": 0.0164, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 0.2931572454829113, | |
| "grad_norm": 0.8997277021408081, | |
| "learning_rate": 4.234449342982879e-05, | |
| "loss": 0.0173, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.2937377548799071, | |
| "grad_norm": 0.3323515057563782, | |
| "learning_rate": 4.230989765373587e-05, | |
| "loss": 0.0156, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 0.294318264276903, | |
| "grad_norm": 0.0954294502735138, | |
| "learning_rate": 4.2275238085469326e-05, | |
| "loss": 0.0171, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 0.29489877367389883, | |
| "grad_norm": 0.3944256007671356, | |
| "learning_rate": 4.224051485276006e-05, | |
| "loss": 0.0147, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 0.2954792830708947, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.220572808357363e-05, | |
| "loss": 0.0178, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 0.29605979246789055, | |
| "grad_norm": 1.3922127485275269, | |
| "learning_rate": 4.217087790610973e-05, | |
| "loss": 0.016, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.29664030186488644, | |
| "grad_norm": 0.48834431171417236, | |
| "learning_rate": 4.213596444880173e-05, | |
| "loss": 0.013, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 0.2972208112618823, | |
| "grad_norm": 1.1236047744750977, | |
| "learning_rate": 4.210098784031621e-05, | |
| "loss": 0.0177, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 0.29780132065887815, | |
| "grad_norm": 0.22413845360279083, | |
| "learning_rate": 4.206594820955249e-05, | |
| "loss": 0.0195, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 0.29838183005587404, | |
| "grad_norm": 1.6964247226715088, | |
| "learning_rate": 4.2030845685642136e-05, | |
| "loss": 0.0171, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 0.2989623394528699, | |
| "grad_norm": 0.4666268825531006, | |
| "learning_rate": 4.199568039794848e-05, | |
| "loss": 0.0181, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.29954284884986576, | |
| "grad_norm": 0.7793068289756775, | |
| "learning_rate": 4.196045247606619e-05, | |
| "loss": 0.0149, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 0.3001233582468616, | |
| "grad_norm": 0.6577598452568054, | |
| "learning_rate": 4.192516204982073e-05, | |
| "loss": 0.0215, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 0.3007038676438575, | |
| "grad_norm": 0.31358566880226135, | |
| "learning_rate": 4.188980924926794e-05, | |
| "loss": 0.0208, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 0.30128437704085337, | |
| "grad_norm": 0.1279175728559494, | |
| "learning_rate": 4.1854394204693495e-05, | |
| "loss": 0.0132, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 0.3018648864378492, | |
| "grad_norm": 1.288215160369873, | |
| "learning_rate": 4.1818917046612474e-05, | |
| "loss": 0.0196, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.3024453958348451, | |
| "grad_norm": 0.7573376893997192, | |
| "learning_rate": 4.178337790576888e-05, | |
| "loss": 0.018, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 0.3030259052318409, | |
| "grad_norm": 0.07305464148521423, | |
| "learning_rate": 4.1747776913135115e-05, | |
| "loss": 0.0144, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 0.3036064146288368, | |
| "grad_norm": 0.41413378715515137, | |
| "learning_rate": 4.1712114199911534e-05, | |
| "loss": 0.0195, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 0.3041869240258327, | |
| "grad_norm": 0.6894093155860901, | |
| "learning_rate": 4.1676389897525946e-05, | |
| "loss": 0.0147, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 0.3047674334228285, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.1640604137633144e-05, | |
| "loss": 0.0191, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.3053479428198244, | |
| "grad_norm": 0.42625299096107483, | |
| "learning_rate": 4.16047570521144e-05, | |
| "loss": 0.0143, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 0.30592845221682025, | |
| "grad_norm": 0.7680391073226929, | |
| "learning_rate": 4.156884877307701e-05, | |
| "loss": 0.0141, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 0.30650896161381613, | |
| "grad_norm": 0.7309791445732117, | |
| "learning_rate": 4.1532879432853744e-05, | |
| "loss": 0.0142, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 0.30708947101081197, | |
| "grad_norm": 0.5670241117477417, | |
| "learning_rate": 4.149684916400246e-05, | |
| "loss": 0.016, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 0.30766998040780785, | |
| "grad_norm": 1.2159571647644043, | |
| "learning_rate": 4.146075809930549e-05, | |
| "loss": 0.0192, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.30825048980480374, | |
| "grad_norm": 1.0170856714248657, | |
| "learning_rate": 4.142460637176928e-05, | |
| "loss": 0.0139, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 0.30883099920179957, | |
| "grad_norm": 0.7249845266342163, | |
| "learning_rate": 4.138839411462379e-05, | |
| "loss": 0.0162, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 0.30941150859879546, | |
| "grad_norm": 0.25050070881843567, | |
| "learning_rate": 4.1352121461322065e-05, | |
| "loss": 0.0164, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 0.3099920179957913, | |
| "grad_norm": 0.68352210521698, | |
| "learning_rate": 4.131578854553976e-05, | |
| "loss": 0.0175, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 0.3105725273927872, | |
| "grad_norm": 0.3992615342140198, | |
| "learning_rate": 4.1279395501174544e-05, | |
| "loss": 0.0178, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.311153036789783, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.1242942462345744e-05, | |
| "loss": 0.0192, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 0.3117335461867789, | |
| "grad_norm": 1.5646060705184937, | |
| "learning_rate": 4.1206429563393765e-05, | |
| "loss": 0.0185, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 0.3123140555837748, | |
| "grad_norm": 0.9312039613723755, | |
| "learning_rate": 4.11698569388796e-05, | |
| "loss": 0.0136, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 0.3128945649807706, | |
| "grad_norm": 1.540569543838501, | |
| "learning_rate": 4.113322472358436e-05, | |
| "loss": 0.0188, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 0.3134750743777665, | |
| "grad_norm": 0.13842260837554932, | |
| "learning_rate": 4.109653305250877e-05, | |
| "loss": 0.0142, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.31405558377476234, | |
| "grad_norm": 1.753185510635376, | |
| "learning_rate": 4.105978206087265e-05, | |
| "loss": 0.0188, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 0.3146360931717582, | |
| "grad_norm": 0.4443669319152832, | |
| "learning_rate": 4.102297188411446e-05, | |
| "loss": 0.0165, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 0.31521660256875406, | |
| "grad_norm": 0.6722429990768433, | |
| "learning_rate": 4.0986102657890744e-05, | |
| "loss": 0.0192, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 0.31579711196574994, | |
| "grad_norm": 0.7747224569320679, | |
| "learning_rate": 4.09491745180757e-05, | |
| "loss": 0.0169, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 0.31637762136274583, | |
| "grad_norm": 0.2598312795162201, | |
| "learning_rate": 4.09121876007606e-05, | |
| "loss": 0.0172, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.31695813075974166, | |
| "grad_norm": 0.11571415513753891, | |
| "learning_rate": 4.087514204225336e-05, | |
| "loss": 0.013, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 0.31753864015673755, | |
| "grad_norm": 0.6480383276939392, | |
| "learning_rate": 4.0838037979077976e-05, | |
| "loss": 0.0182, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 0.3181191495537334, | |
| "grad_norm": 0.6114629507064819, | |
| "learning_rate": 4.080087554797408e-05, | |
| "loss": 0.0206, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 0.31869965895072927, | |
| "grad_norm": 0.8482924699783325, | |
| "learning_rate": 4.076365488589641e-05, | |
| "loss": 0.0229, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 0.3192801683477251, | |
| "grad_norm": 0.37672215700149536, | |
| "learning_rate": 4.072637613001426e-05, | |
| "loss": 0.0188, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.319860677744721, | |
| "grad_norm": 0.7157580256462097, | |
| "learning_rate": 4.0689039417711075e-05, | |
| "loss": 0.0176, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 0.3204411871417169, | |
| "grad_norm": 1.3182079792022705, | |
| "learning_rate": 4.065164488658383e-05, | |
| "loss": 0.0183, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 0.3210216965387127, | |
| "grad_norm": 17.849079132080078, | |
| "learning_rate": 4.061419267444263e-05, | |
| "loss": 0.0147, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 0.3216022059357086, | |
| "grad_norm": 2.8836376667022705, | |
| "learning_rate": 4.057668291931012e-05, | |
| "loss": 0.0164, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 0.32218271533270443, | |
| "grad_norm": 2.2379231452941895, | |
| "learning_rate": 4.0539115759421016e-05, | |
| "loss": 0.0155, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.3227632247297003, | |
| "grad_norm": 0.6565619707107544, | |
| "learning_rate": 4.050149133322158e-05, | |
| "loss": 0.0158, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 0.3233437341266962, | |
| "grad_norm": 9.40256404876709, | |
| "learning_rate": 4.046380977936915e-05, | |
| "loss": 0.0161, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 0.32392424352369203, | |
| "grad_norm": 1.0256812572479248, | |
| "learning_rate": 4.042607123673156e-05, | |
| "loss": 0.0185, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 0.3245047529206879, | |
| "grad_norm": 0.3539896011352539, | |
| "learning_rate": 4.038827584438668e-05, | |
| "loss": 0.013, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 0.32508526231768375, | |
| "grad_norm": 0.3358542323112488, | |
| "learning_rate": 4.035042374162189e-05, | |
| "loss": 0.0179, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.32566577171467964, | |
| "grad_norm": 0.6049757599830627, | |
| "learning_rate": 4.0312515067933545e-05, | |
| "loss": 0.0158, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 0.3262462811116755, | |
| "grad_norm": 0.1417369246482849, | |
| "learning_rate": 4.027454996302652e-05, | |
| "loss": 0.019, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 0.32682679050867136, | |
| "grad_norm": 1.0133875608444214, | |
| "learning_rate": 4.023652856681363e-05, | |
| "loss": 0.0145, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 0.32740729990566725, | |
| "grad_norm": 5.445352554321289, | |
| "learning_rate": 4.019845101941512e-05, | |
| "loss": 0.0202, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 0.3279878093026631, | |
| "grad_norm": 2.076885223388672, | |
| "learning_rate": 4.0160317461158213e-05, | |
| "loss": 0.0138, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.32856831869965897, | |
| "grad_norm": 0.7435348033905029, | |
| "learning_rate": 4.0122128032576524e-05, | |
| "loss": 0.0163, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 0.3291488280966548, | |
| "grad_norm": 0.662987470626831, | |
| "learning_rate": 4.0083882874409576e-05, | |
| "loss": 0.0179, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 0.3297293374936507, | |
| "grad_norm": 0.7310676574707031, | |
| "learning_rate": 4.004558212760227e-05, | |
| "loss": 0.0136, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 0.3303098468906465, | |
| "grad_norm": 1.144674301147461, | |
| "learning_rate": 4.0007225933304344e-05, | |
| "loss": 0.0183, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 0.3308903562876424, | |
| "grad_norm": 0.7550173997879028, | |
| "learning_rate": 3.9968814432869914e-05, | |
| "loss": 0.0125, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.3314708656846383, | |
| "grad_norm": 0.5192617774009705, | |
| "learning_rate": 3.993034776785691e-05, | |
| "loss": 0.014, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 0.3320513750816341, | |
| "grad_norm": 0.10176233947277069, | |
| "learning_rate": 3.9891826080026535e-05, | |
| "loss": 0.0148, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 0.33263188447863, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.9853249511342786e-05, | |
| "loss": 0.0153, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 0.33321239387562585, | |
| "grad_norm": 0.5603938698768616, | |
| "learning_rate": 3.981461820397191e-05, | |
| "loss": 0.0153, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 0.33379290327262173, | |
| "grad_norm": 0.9487095475196838, | |
| "learning_rate": 3.977593230028188e-05, | |
| "loss": 0.0158, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.33437341266961756, | |
| "grad_norm": 4.972527027130127, | |
| "learning_rate": 3.973719194284188e-05, | |
| "loss": 0.016, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 0.33495392206661345, | |
| "grad_norm": 0.1742544323205948, | |
| "learning_rate": 3.969839727442175e-05, | |
| "loss": 0.017, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 0.33553443146360934, | |
| "grad_norm": 0.43199607729911804, | |
| "learning_rate": 3.965954843799152e-05, | |
| "loss": 0.0156, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 0.33611494086060517, | |
| "grad_norm": 2.0231590270996094, | |
| "learning_rate": 3.9620645576720815e-05, | |
| "loss": 0.0173, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 0.33669545025760106, | |
| "grad_norm": 1.236526608467102, | |
| "learning_rate": 3.9581688833978375e-05, | |
| "loss": 0.0171, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.3372759596545969, | |
| "grad_norm": 1.1368087530136108, | |
| "learning_rate": 3.954267835333148e-05, | |
| "loss": 0.0118, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 0.3378564690515928, | |
| "grad_norm": 0.8430467844009399, | |
| "learning_rate": 3.9503614278545494e-05, | |
| "loss": 0.0141, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 0.3384369784485886, | |
| "grad_norm": 0.19449672102928162, | |
| "learning_rate": 3.946449675358327e-05, | |
| "loss": 0.0158, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 0.3390174878455845, | |
| "grad_norm": 0.10014590620994568, | |
| "learning_rate": 3.9425325922604615e-05, | |
| "loss": 0.0152, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 0.3395979972425804, | |
| "grad_norm": 0.4984476864337921, | |
| "learning_rate": 3.938610192996584e-05, | |
| "loss": 0.0164, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.3401785066395762, | |
| "grad_norm": 2.3255436420440674, | |
| "learning_rate": 3.934682492021913e-05, | |
| "loss": 0.0181, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 0.3407590160365721, | |
| "grad_norm": 1.8835875988006592, | |
| "learning_rate": 3.930749503811206e-05, | |
| "loss": 0.012, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 0.34133952543356794, | |
| "grad_norm": 1.3894046545028687, | |
| "learning_rate": 3.9268112428587074e-05, | |
| "loss": 0.015, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 0.3419200348305638, | |
| "grad_norm": 0.15835818648338318, | |
| "learning_rate": 3.922867723678091e-05, | |
| "loss": 0.0166, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 0.3425005442275597, | |
| "grad_norm": 0.3661365807056427, | |
| "learning_rate": 3.918918960802411e-05, | |
| "loss": 0.0162, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.34308105362455554, | |
| "grad_norm": 0.11089111864566803, | |
| "learning_rate": 3.914964968784044e-05, | |
| "loss": 0.0232, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 0.34366156302155143, | |
| "grad_norm": 2.527754306793213, | |
| "learning_rate": 3.911005762194639e-05, | |
| "loss": 0.0147, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 0.34424207241854726, | |
| "grad_norm": 0.15072380006313324, | |
| "learning_rate": 3.9070413556250616e-05, | |
| "loss": 0.0189, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 0.34482258181554315, | |
| "grad_norm": 0.5700109004974365, | |
| "learning_rate": 3.903071763685342e-05, | |
| "loss": 0.0151, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 0.345403091212539, | |
| "grad_norm": 0.8213745951652527, | |
| "learning_rate": 3.899097001004618e-05, | |
| "loss": 0.0167, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.34598360060953487, | |
| "grad_norm": 1.022154450416565, | |
| "learning_rate": 3.895117082231085e-05, | |
| "loss": 0.0146, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 0.34656411000653076, | |
| "grad_norm": 0.2379520982503891, | |
| "learning_rate": 3.891132022031939e-05, | |
| "loss": 0.0179, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 0.3471446194035266, | |
| "grad_norm": 0.835014283657074, | |
| "learning_rate": 3.8871418350933256e-05, | |
| "loss": 0.0145, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 0.3477251288005225, | |
| "grad_norm": 5.786501884460449, | |
| "learning_rate": 3.8831465361202794e-05, | |
| "loss": 0.0145, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 0.3483056381975183, | |
| "grad_norm": 0.40479740500450134, | |
| "learning_rate": 3.87914613983668e-05, | |
| "loss": 0.0175, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.3488861475945142, | |
| "grad_norm": 0.2653241753578186, | |
| "learning_rate": 3.875140660985189e-05, | |
| "loss": 0.0156, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 0.34946665699151, | |
| "grad_norm": 0.2719464600086212, | |
| "learning_rate": 3.8711301143272004e-05, | |
| "loss": 0.0122, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 0.3500471663885059, | |
| "grad_norm": 0.23439522087574005, | |
| "learning_rate": 3.8671145146427825e-05, | |
| "loss": 0.0169, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 0.3506276757855018, | |
| "grad_norm": 0.06842320412397385, | |
| "learning_rate": 3.8630938767306256e-05, | |
| "loss": 0.0141, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 0.35120818518249763, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.85906821540799e-05, | |
| "loss": 0.0144, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.3517886945794935, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.855037545510648e-05, | |
| "loss": 0.017, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 0.35236920397648935, | |
| "grad_norm": 0.27966034412384033, | |
| "learning_rate": 3.851001881892827e-05, | |
| "loss": 0.0197, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 0.35294971337348524, | |
| "grad_norm": 2.5935139656066895, | |
| "learning_rate": 3.846961239427161e-05, | |
| "loss": 0.0164, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 0.3535302227704811, | |
| "grad_norm": 0.5523900985717773, | |
| "learning_rate": 3.842915633004632e-05, | |
| "loss": 0.0186, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 0.35411073216747696, | |
| "grad_norm": 7.492378234863281, | |
| "learning_rate": 3.8388650775345144e-05, | |
| "loss": 0.0182, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.35469124156447285, | |
| "grad_norm": 0.31653234362602234, | |
| "learning_rate": 3.8348095879443226e-05, | |
| "loss": 0.0145, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 0.3552717509614687, | |
| "grad_norm": 1.4802820682525635, | |
| "learning_rate": 3.830749179179752e-05, | |
| "loss": 0.015, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 0.35585226035846457, | |
| "grad_norm": 0.4932232201099396, | |
| "learning_rate": 3.8266838662046334e-05, | |
| "loss": 0.0133, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 0.3564327697554604, | |
| "grad_norm": 3.2751312255859375, | |
| "learning_rate": 3.822613664000862e-05, | |
| "loss": 0.0155, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 0.3570132791524563, | |
| "grad_norm": 0.8037987351417542, | |
| "learning_rate": 3.818538587568359e-05, | |
| "loss": 0.0196, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.3575937885494521, | |
| "grad_norm": 0.5324920415878296, | |
| "learning_rate": 3.8144586519250044e-05, | |
| "loss": 0.0161, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 0.358174297946448, | |
| "grad_norm": 0.251559317111969, | |
| "learning_rate": 3.8103738721065856e-05, | |
| "loss": 0.014, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 0.3587548073434439, | |
| "grad_norm": 2.9223034381866455, | |
| "learning_rate": 3.806284263166745e-05, | |
| "loss": 0.0119, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 0.3593353167404397, | |
| "grad_norm": 0.4311857521533966, | |
| "learning_rate": 3.8021898401769205e-05, | |
| "loss": 0.0149, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 0.3599158261374356, | |
| "grad_norm": 0.5429189801216125, | |
| "learning_rate": 3.7980906182262893e-05, | |
| "loss": 0.0211, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.36049633553443144, | |
| "grad_norm": 1.3071308135986328, | |
| "learning_rate": 3.793986612421717e-05, | |
| "loss": 0.0132, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 0.36107684493142733, | |
| "grad_norm": 0.4390534460544586, | |
| "learning_rate": 3.789877837887698e-05, | |
| "loss": 0.0165, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 0.36165735432842316, | |
| "grad_norm": 42.51701354980469, | |
| "learning_rate": 3.7857643097663006e-05, | |
| "loss": 0.0151, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 0.36223786372541905, | |
| "grad_norm": 1.4220099449157715, | |
| "learning_rate": 3.7816460432171135e-05, | |
| "loss": 0.014, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 0.36281837312241494, | |
| "grad_norm": 0.5632671117782593, | |
| "learning_rate": 3.777523053417184e-05, | |
| "loss": 0.0168, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.36339888251941077, | |
| "grad_norm": 0.7514089345932007, | |
| "learning_rate": 3.7733953555609696e-05, | |
| "loss": 0.0171, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 0.36397939191640666, | |
| "grad_norm": 0.2681547701358795, | |
| "learning_rate": 3.769262964860276e-05, | |
| "loss": 0.0134, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 0.3645599013134025, | |
| "grad_norm": 1.961303472518921, | |
| "learning_rate": 3.765125896544206e-05, | |
| "loss": 0.0201, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 0.3651404107103984, | |
| "grad_norm": 0.6701322197914124, | |
| "learning_rate": 3.7609841658590985e-05, | |
| "loss": 0.0159, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 0.36572092010739427, | |
| "grad_norm": 0.3040947914123535, | |
| "learning_rate": 3.756837788068475e-05, | |
| "loss": 0.0157, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.3663014295043901, | |
| "grad_norm": 0.28461697697639465, | |
| "learning_rate": 3.7526867784529835e-05, | |
| "loss": 0.0172, | |
| "step": 63100 | |
| }, | |
| { | |
| "epoch": 0.366881938901386, | |
| "grad_norm": 1.9548020362854004, | |
| "learning_rate": 3.7485311523103427e-05, | |
| "loss": 0.0199, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 0.3674624482983818, | |
| "grad_norm": 0.6341608762741089, | |
| "learning_rate": 3.744370924955282e-05, | |
| "loss": 0.0206, | |
| "step": 63300 | |
| }, | |
| { | |
| "epoch": 0.3680429576953777, | |
| "grad_norm": 0.3764314651489258, | |
| "learning_rate": 3.7402061117194915e-05, | |
| "loss": 0.0151, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 0.36862346709237354, | |
| "grad_norm": 0.3538680076599121, | |
| "learning_rate": 3.7360367279515565e-05, | |
| "loss": 0.0157, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.3692039764893694, | |
| "grad_norm": 0.1386338174343109, | |
| "learning_rate": 3.731862789016911e-05, | |
| "loss": 0.0147, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 0.3697844858863653, | |
| "grad_norm": 0.5350472331047058, | |
| "learning_rate": 3.7276843102977725e-05, | |
| "loss": 0.0128, | |
| "step": 63700 | |
| }, | |
| { | |
| "epoch": 0.37036499528336114, | |
| "grad_norm": 1.673578143119812, | |
| "learning_rate": 3.723501307193091e-05, | |
| "loss": 0.0138, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 0.37094550468035703, | |
| "grad_norm": 0.18305979669094086, | |
| "learning_rate": 3.719313795118491e-05, | |
| "loss": 0.0198, | |
| "step": 63900 | |
| }, | |
| { | |
| "epoch": 0.37152601407735286, | |
| "grad_norm": 0.8609408736228943, | |
| "learning_rate": 3.7151217895062105e-05, | |
| "loss": 0.0188, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.37210652347434875, | |
| "grad_norm": 0.28579720854759216, | |
| "learning_rate": 3.710925305805051e-05, | |
| "loss": 0.0163, | |
| "step": 64100 | |
| }, | |
| { | |
| "epoch": 0.3726870328713446, | |
| "grad_norm": 0.5904589295387268, | |
| "learning_rate": 3.706724359480316e-05, | |
| "loss": 0.0156, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 0.37326754226834047, | |
| "grad_norm": 1.1671172380447388, | |
| "learning_rate": 3.7025189660137535e-05, | |
| "loss": 0.0157, | |
| "step": 64300 | |
| }, | |
| { | |
| "epoch": 0.37384805166533636, | |
| "grad_norm": 0.12482750415802002, | |
| "learning_rate": 3.698309140903504e-05, | |
| "loss": 0.0143, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 0.3744285610623322, | |
| "grad_norm": 2.280238151550293, | |
| "learning_rate": 3.694094899664037e-05, | |
| "loss": 0.0142, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.3750090704593281, | |
| "grad_norm": 0.1061575785279274, | |
| "learning_rate": 3.689876257826096e-05, | |
| "loss": 0.0228, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 0.3755895798563239, | |
| "grad_norm": 0.531701385974884, | |
| "learning_rate": 3.685653230936646e-05, | |
| "loss": 0.0193, | |
| "step": 64700 | |
| }, | |
| { | |
| "epoch": 0.3761700892533198, | |
| "grad_norm": 0.49610480666160583, | |
| "learning_rate": 3.681425834558808e-05, | |
| "loss": 0.0182, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 0.3767505986503156, | |
| "grad_norm": 2.2673187255859375, | |
| "learning_rate": 3.67719408427181e-05, | |
| "loss": 0.0153, | |
| "step": 64900 | |
| }, | |
| { | |
| "epoch": 0.3773311080473115, | |
| "grad_norm": 0.8174604177474976, | |
| "learning_rate": 3.672957995670921e-05, | |
| "loss": 0.0152, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.3779116174443074, | |
| "grad_norm": 2.4207687377929688, | |
| "learning_rate": 3.668717584367401e-05, | |
| "loss": 0.0135, | |
| "step": 65100 | |
| }, | |
| { | |
| "epoch": 0.37849212684130323, | |
| "grad_norm": 0.7421271204948425, | |
| "learning_rate": 3.664472865988441e-05, | |
| "loss": 0.0171, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 0.3790726362382991, | |
| "grad_norm": 0.5504394173622131, | |
| "learning_rate": 3.660223856177102e-05, | |
| "loss": 0.0171, | |
| "step": 65300 | |
| }, | |
| { | |
| "epoch": 0.37965314563529495, | |
| "grad_norm": 0.41542255878448486, | |
| "learning_rate": 3.655970570592262e-05, | |
| "loss": 0.0118, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 0.38023365503229084, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.651713024908556e-05, | |
| "loss": 0.012, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.3808141644292867, | |
| "grad_norm": 0.054853569716215134, | |
| "learning_rate": 3.64745123481632e-05, | |
| "loss": 0.0201, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 0.38139467382628256, | |
| "grad_norm": 1.7980788946151733, | |
| "learning_rate": 3.643185216021531e-05, | |
| "loss": 0.0114, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 0.38197518322327845, | |
| "grad_norm": 0.3637460470199585, | |
| "learning_rate": 3.6389149842457486e-05, | |
| "loss": 0.0158, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 0.3825556926202743, | |
| "grad_norm": 1.9792327880859375, | |
| "learning_rate": 3.634640555226062e-05, | |
| "loss": 0.0156, | |
| "step": 65900 | |
| }, | |
| { | |
| "epoch": 0.38313620201727017, | |
| "grad_norm": 0.38713720440864563, | |
| "learning_rate": 3.630361944715024e-05, | |
| "loss": 0.0162, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.383716711414266, | |
| "grad_norm": 0.6612209677696228, | |
| "learning_rate": 3.626079168480601e-05, | |
| "loss": 0.0147, | |
| "step": 66100 | |
| }, | |
| { | |
| "epoch": 0.3842972208112619, | |
| "grad_norm": 1.3527421951293945, | |
| "learning_rate": 3.621792242306111e-05, | |
| "loss": 0.0168, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 0.3848777302082578, | |
| "grad_norm": 3.078646421432495, | |
| "learning_rate": 3.617501181990164e-05, | |
| "loss": 0.015, | |
| "step": 66300 | |
| }, | |
| { | |
| "epoch": 0.3854582396052536, | |
| "grad_norm": 2.9273505210876465, | |
| "learning_rate": 3.613206003346606e-05, | |
| "loss": 0.0182, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 0.3860387490022495, | |
| "grad_norm": 0.700567364692688, | |
| "learning_rate": 3.608906722204463e-05, | |
| "loss": 0.0138, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.3866192583992453, | |
| "grad_norm": 0.4075513482093811, | |
| "learning_rate": 3.6046033544078736e-05, | |
| "loss": 0.0151, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 0.3871997677962412, | |
| "grad_norm": 1.424938678741455, | |
| "learning_rate": 3.6002959158160454e-05, | |
| "loss": 0.0141, | |
| "step": 66700 | |
| }, | |
| { | |
| "epoch": 0.38778027719323704, | |
| "grad_norm": 1.095062255859375, | |
| "learning_rate": 3.595984422303182e-05, | |
| "loss": 0.0177, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 0.38836078659023293, | |
| "grad_norm": 1.1501364707946777, | |
| "learning_rate": 3.591668889758432e-05, | |
| "loss": 0.0128, | |
| "step": 66900 | |
| }, | |
| { | |
| "epoch": 0.3889412959872288, | |
| "grad_norm": 0.35930997133255005, | |
| "learning_rate": 3.587349334085831e-05, | |
| "loss": 0.0163, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.38952180538422465, | |
| "grad_norm": 0.22883236408233643, | |
| "learning_rate": 3.5830257712042374e-05, | |
| "loss": 0.0144, | |
| "step": 67100 | |
| }, | |
| { | |
| "epoch": 0.39010231478122054, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.578698217047281e-05, | |
| "loss": 0.0146, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 0.39068282417821637, | |
| "grad_norm": 0.4836377501487732, | |
| "learning_rate": 3.574366687563298e-05, | |
| "loss": 0.0155, | |
| "step": 67300 | |
| }, | |
| { | |
| "epoch": 0.39126333357521226, | |
| "grad_norm": 1.9902615547180176, | |
| "learning_rate": 3.570031198715277e-05, | |
| "loss": 0.0189, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 0.3918438429722081, | |
| "grad_norm": 0.6981222629547119, | |
| "learning_rate": 3.565691766480795e-05, | |
| "loss": 0.0167, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.392424352369204, | |
| "grad_norm": 1.2047152519226074, | |
| "learning_rate": 3.561348406851966e-05, | |
| "loss": 0.0158, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 0.39300486176619986, | |
| "grad_norm": 1.062116026878357, | |
| "learning_rate": 3.557001135835375e-05, | |
| "loss": 0.0156, | |
| "step": 67700 | |
| }, | |
| { | |
| "epoch": 0.3935853711631957, | |
| "grad_norm": 3.757115602493286, | |
| "learning_rate": 3.55264996945202e-05, | |
| "loss": 0.0157, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 0.3941658805601916, | |
| "grad_norm": 2.9534924030303955, | |
| "learning_rate": 3.548294923737258e-05, | |
| "loss": 0.0157, | |
| "step": 67900 | |
| }, | |
| { | |
| "epoch": 0.3947463899571874, | |
| "grad_norm": 0.46122029423713684, | |
| "learning_rate": 3.5439360147407404e-05, | |
| "loss": 0.016, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.3953268993541833, | |
| "grad_norm": 2.8722681999206543, | |
| "learning_rate": 3.5395732585263566e-05, | |
| "loss": 0.0144, | |
| "step": 68100 | |
| }, | |
| { | |
| "epoch": 0.39590740875117914, | |
| "grad_norm": 0.988606870174408, | |
| "learning_rate": 3.535206671172175e-05, | |
| "loss": 0.014, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 0.396487918148175, | |
| "grad_norm": 0.39610621333122253, | |
| "learning_rate": 3.530836268770379e-05, | |
| "loss": 0.0141, | |
| "step": 68300 | |
| }, | |
| { | |
| "epoch": 0.3970684275451709, | |
| "grad_norm": 3.2667903900146484, | |
| "learning_rate": 3.526462067427218e-05, | |
| "loss": 0.0212, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 0.39764893694216674, | |
| "grad_norm": 0.9565812945365906, | |
| "learning_rate": 3.522084083262935e-05, | |
| "loss": 0.0145, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 0.39822944633916263, | |
| "grad_norm": 1.0002511739730835, | |
| "learning_rate": 3.5177023324117206e-05, | |
| "loss": 0.0158, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 0.39880995573615846, | |
| "grad_norm": 0.8633850812911987, | |
| "learning_rate": 3.51331683102164e-05, | |
| "loss": 0.0192, | |
| "step": 68700 | |
| }, | |
| { | |
| "epoch": 0.39939046513315435, | |
| "grad_norm": 0.359651654958725, | |
| "learning_rate": 3.508927595254585e-05, | |
| "loss": 0.0198, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 0.3999709745301502, | |
| "grad_norm": 1.0570274591445923, | |
| "learning_rate": 3.504534641286209e-05, | |
| "loss": 0.0163, | |
| "step": 68900 | |
| }, | |
| { | |
| "epoch": 0.40055148392714607, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.500137985305865e-05, | |
| "loss": 0.0141, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.40113199332414196, | |
| "grad_norm": 1.52181077003479, | |
| "learning_rate": 3.495737643516552e-05, | |
| "loss": 0.0145, | |
| "step": 69100 | |
| }, | |
| { | |
| "epoch": 0.4017125027211378, | |
| "grad_norm": 0.6070308685302734, | |
| "learning_rate": 3.491333632134852e-05, | |
| "loss": 0.0179, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 0.4022930121181337, | |
| "grad_norm": 0.19646623730659485, | |
| "learning_rate": 3.486925967390871e-05, | |
| "loss": 0.0139, | |
| "step": 69300 | |
| }, | |
| { | |
| "epoch": 0.4028735215151295, | |
| "grad_norm": 0.0677868127822876, | |
| "learning_rate": 3.482514665528176e-05, | |
| "loss": 0.0186, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 0.4034540309121254, | |
| "grad_norm": 9.331048965454102, | |
| "learning_rate": 3.4780997428037424e-05, | |
| "loss": 0.0139, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 0.4040345403091212, | |
| "grad_norm": 1.8702892065048218, | |
| "learning_rate": 3.473681215487884e-05, | |
| "loss": 0.0162, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 0.4046150497061171, | |
| "grad_norm": 0.36429017782211304, | |
| "learning_rate": 3.4692590998642026e-05, | |
| "loss": 0.0164, | |
| "step": 69700 | |
| }, | |
| { | |
| "epoch": 0.405195559103113, | |
| "grad_norm": 2.0621962547302246, | |
| "learning_rate": 3.464833412229523e-05, | |
| "loss": 0.0125, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 0.40577606850010883, | |
| "grad_norm": 0.6523299217224121, | |
| "learning_rate": 3.460404168893834e-05, | |
| "loss": 0.0171, | |
| "step": 69900 | |
| }, | |
| { | |
| "epoch": 0.4063565778971047, | |
| "grad_norm": 0.20909562706947327, | |
| "learning_rate": 3.455971386180229e-05, | |
| "loss": 0.0179, | |
| "step": 70000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 172263, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 10000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1534812258533112e+18, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |