Text Generation
Safetensors
English
Chinese
qwen3
process-reward-model
prm
reasoning
reinforcement-learning
grpo
qwen
conversational
Instructions to use skylenage-ai/GPRM-4B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Inference
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 4971, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.006035913686434284, | |
| "grad_norm": 13.465597639885102, | |
| "learning_rate": 1.8072289156626505e-07, | |
| "loss": 0.8887, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.012071827372868568, | |
| "grad_norm": 12.414937705631793, | |
| "learning_rate": 3.8152610441767073e-07, | |
| "loss": 0.8871, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01810774105930285, | |
| "grad_norm": 5.604563580675332, | |
| "learning_rate": 5.823293172690764e-07, | |
| "loss": 0.7943, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.024143654745737136, | |
| "grad_norm": 3.082127771205323, | |
| "learning_rate": 7.83132530120482e-07, | |
| "loss": 0.6989, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03017956843217142, | |
| "grad_norm": 1.9253363533227204, | |
| "learning_rate": 9.839357429718876e-07, | |
| "loss": 0.6283, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0362154821186057, | |
| "grad_norm": 1.2352792533570607, | |
| "learning_rate": 1.1847389558232934e-06, | |
| "loss": 0.5916, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.04225139580503999, | |
| "grad_norm": 0.8094703225757798, | |
| "learning_rate": 1.385542168674699e-06, | |
| "loss": 0.5623, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.04828730949147427, | |
| "grad_norm": 0.7924082712954621, | |
| "learning_rate": 1.5863453815261046e-06, | |
| "loss": 0.536, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.05432322317790855, | |
| "grad_norm": 0.7765422489934142, | |
| "learning_rate": 1.7871485943775102e-06, | |
| "loss": 0.5246, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.06035913686434284, | |
| "grad_norm": 1.2024335532490196, | |
| "learning_rate": 1.987951807228916e-06, | |
| "loss": 0.5112, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06639505055077713, | |
| "grad_norm": 0.7361271470838762, | |
| "learning_rate": 2.1887550200803216e-06, | |
| "loss": 0.4973, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0724309642372114, | |
| "grad_norm": 0.9636947285799289, | |
| "learning_rate": 2.389558232931727e-06, | |
| "loss": 0.4926, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.07846687792364569, | |
| "grad_norm": 0.834221678860187, | |
| "learning_rate": 2.590361445783133e-06, | |
| "loss": 0.4868, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.08450279161007998, | |
| "grad_norm": 0.721459089158014, | |
| "learning_rate": 2.791164658634538e-06, | |
| "loss": 0.4836, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.09053870529651425, | |
| "grad_norm": 0.7680662169711512, | |
| "learning_rate": 2.991967871485944e-06, | |
| "loss": 0.4759, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.09657461898294854, | |
| "grad_norm": 0.8074470103289187, | |
| "learning_rate": 3.1927710843373494e-06, | |
| "loss": 0.4753, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.10261053266938283, | |
| "grad_norm": 0.7821569953929599, | |
| "learning_rate": 3.393574297188755e-06, | |
| "loss": 0.4689, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1086464463558171, | |
| "grad_norm": 0.8046561770221946, | |
| "learning_rate": 3.5943775100401606e-06, | |
| "loss": 0.4678, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.1146823600422514, | |
| "grad_norm": 0.8787311236716008, | |
| "learning_rate": 3.7951807228915664e-06, | |
| "loss": 0.463, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.12071827372868568, | |
| "grad_norm": 0.8810490177348705, | |
| "learning_rate": 3.995983935742972e-06, | |
| "loss": 0.4601, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.12675418741511996, | |
| "grad_norm": 0.8889957032229883, | |
| "learning_rate": 4.196787148594378e-06, | |
| "loss": 0.4589, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.13279010110155426, | |
| "grad_norm": 1.0047774511651133, | |
| "learning_rate": 4.397590361445783e-06, | |
| "loss": 0.4533, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.13882601478798853, | |
| "grad_norm": 0.8382898002966074, | |
| "learning_rate": 4.598393574297189e-06, | |
| "loss": 0.4541, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.1448619284744228, | |
| "grad_norm": 0.8867952107395614, | |
| "learning_rate": 4.799196787148594e-06, | |
| "loss": 0.4488, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.1508978421608571, | |
| "grad_norm": 0.8306941422038123, | |
| "learning_rate": 5e-06, | |
| "loss": 0.4521, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.15693375584729138, | |
| "grad_norm": 0.8874942497893604, | |
| "learning_rate": 5.200803212851407e-06, | |
| "loss": 0.4505, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.16296966953372566, | |
| "grad_norm": 0.8911658824764517, | |
| "learning_rate": 5.401606425702812e-06, | |
| "loss": 0.446, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.16900558322015996, | |
| "grad_norm": 0.8852855388263275, | |
| "learning_rate": 5.602409638554217e-06, | |
| "loss": 0.4452, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.17504149690659423, | |
| "grad_norm": 0.9910318655855725, | |
| "learning_rate": 5.803212851405623e-06, | |
| "loss": 0.4413, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.1810774105930285, | |
| "grad_norm": 1.0110708733608424, | |
| "learning_rate": 6.004016064257029e-06, | |
| "loss": 0.4397, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.1871133242794628, | |
| "grad_norm": 0.9003864963841174, | |
| "learning_rate": 6.2048192771084344e-06, | |
| "loss": 0.4414, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.19314923796589709, | |
| "grad_norm": 0.906399226331659, | |
| "learning_rate": 6.40562248995984e-06, | |
| "loss": 0.4372, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.19918515165233136, | |
| "grad_norm": 1.071695971731785, | |
| "learning_rate": 6.606425702811245e-06, | |
| "loss": 0.4381, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.20522106533876566, | |
| "grad_norm": 0.9761062070856111, | |
| "learning_rate": 6.8072289156626514e-06, | |
| "loss": 0.4343, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.21125697902519994, | |
| "grad_norm": 0.9721838815300707, | |
| "learning_rate": 7.008032128514058e-06, | |
| "loss": 0.4373, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.2172928927116342, | |
| "grad_norm": 0.8909414468062403, | |
| "learning_rate": 7.208835341365462e-06, | |
| "loss": 0.4336, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.2233288063980685, | |
| "grad_norm": 0.964718436271309, | |
| "learning_rate": 7.4096385542168684e-06, | |
| "loss": 0.4348, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.2293647200845028, | |
| "grad_norm": 1.025409695885071, | |
| "learning_rate": 7.610441767068274e-06, | |
| "loss": 0.4326, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.23540063377093706, | |
| "grad_norm": 0.9270275542948012, | |
| "learning_rate": 7.81124497991968e-06, | |
| "loss": 0.4324, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.24143654745737136, | |
| "grad_norm": 0.9390965093376761, | |
| "learning_rate": 8.012048192771085e-06, | |
| "loss": 0.4302, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.24747246114380564, | |
| "grad_norm": 0.7707812298350031, | |
| "learning_rate": 8.21285140562249e-06, | |
| "loss": 0.4276, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.2535083748302399, | |
| "grad_norm": 0.8215921682895242, | |
| "learning_rate": 8.413654618473896e-06, | |
| "loss": 0.4274, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.2595442885166742, | |
| "grad_norm": 1.0290878620245738, | |
| "learning_rate": 8.614457831325302e-06, | |
| "loss": 0.427, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.2655802022031085, | |
| "grad_norm": 0.9009095092288704, | |
| "learning_rate": 8.815261044176707e-06, | |
| "loss": 0.4232, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.27161611588954276, | |
| "grad_norm": 0.9646916353387767, | |
| "learning_rate": 9.016064257028112e-06, | |
| "loss": 0.4235, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.27765202957597707, | |
| "grad_norm": 0.8009669905789347, | |
| "learning_rate": 9.21686746987952e-06, | |
| "loss": 0.4248, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.28368794326241137, | |
| "grad_norm": 2.57971922495045, | |
| "learning_rate": 9.417670682730925e-06, | |
| "loss": 0.4246, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.2897238569488456, | |
| "grad_norm": 0.9225235875464007, | |
| "learning_rate": 9.61847389558233e-06, | |
| "loss": 0.4256, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.2957597706352799, | |
| "grad_norm": 0.8937790567235143, | |
| "learning_rate": 9.819277108433736e-06, | |
| "loss": 0.4232, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.3017956843217142, | |
| "grad_norm": 0.992661961364272, | |
| "learning_rate": 9.99999876677608e-06, | |
| "loss": 0.4236, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.30783159800814847, | |
| "grad_norm": 1.1203639087859305, | |
| "learning_rate": 9.999850780641762e-06, | |
| "loss": 0.423, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.31386751169458277, | |
| "grad_norm": 0.9504675074156581, | |
| "learning_rate": 9.999456158087994e-06, | |
| "loss": 0.4255, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.31990342538101707, | |
| "grad_norm": 1.1526705071263037, | |
| "learning_rate": 9.998814918581017e-06, | |
| "loss": 0.4236, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.3259393390674513, | |
| "grad_norm": 0.9400926329756719, | |
| "learning_rate": 9.99792709375238e-06, | |
| "loss": 0.4193, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.3319752527538856, | |
| "grad_norm": 0.761979605644821, | |
| "learning_rate": 9.996792727397374e-06, | |
| "loss": 0.4178, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.3380111664403199, | |
| "grad_norm": 0.7761858463434534, | |
| "learning_rate": 9.995411875472882e-06, | |
| "loss": 0.4172, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.34404708012675417, | |
| "grad_norm": 0.8353265789234773, | |
| "learning_rate": 9.993784606094612e-06, | |
| "loss": 0.417, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.35008299381318847, | |
| "grad_norm": 0.7921534241896437, | |
| "learning_rate": 9.991910999533739e-06, | |
| "loss": 0.4164, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.35611890749962277, | |
| "grad_norm": 0.8368518529458858, | |
| "learning_rate": 9.98979114821294e-06, | |
| "loss": 0.4212, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.362154821186057, | |
| "grad_norm": 0.8526689259731893, | |
| "learning_rate": 9.98742515670185e-06, | |
| "loss": 0.413, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3681907348724913, | |
| "grad_norm": 0.8691355689423315, | |
| "learning_rate": 9.98481314171188e-06, | |
| "loss": 0.4147, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.3742266485589256, | |
| "grad_norm": 0.7413766525933784, | |
| "learning_rate": 9.981955232090484e-06, | |
| "loss": 0.4202, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.38026256224535987, | |
| "grad_norm": 0.862826800304683, | |
| "learning_rate": 9.978851568814789e-06, | |
| "loss": 0.4144, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.38629847593179417, | |
| "grad_norm": 0.852995884285724, | |
| "learning_rate": 9.975502304984643e-06, | |
| "loss": 0.4159, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.3923343896182285, | |
| "grad_norm": 0.8190268708459463, | |
| "learning_rate": 9.971907605815065e-06, | |
| "loss": 0.4133, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.3983703033046627, | |
| "grad_norm": 0.7826738241592833, | |
| "learning_rate": 9.968067648628092e-06, | |
| "loss": 0.417, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.404406216991097, | |
| "grad_norm": 0.8234056482304477, | |
| "learning_rate": 9.963982622844037e-06, | |
| "loss": 0.4151, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.4104421306775313, | |
| "grad_norm": 0.8389822495874198, | |
| "learning_rate": 9.959652729972138e-06, | |
| "loss": 0.4142, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.41647804436396557, | |
| "grad_norm": 0.7530220222404655, | |
| "learning_rate": 9.955078183600626e-06, | |
| "loss": 0.4135, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.4225139580503999, | |
| "grad_norm": 0.8094044727188283, | |
| "learning_rate": 9.950259209386182e-06, | |
| "loss": 0.4076, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.4285498717368342, | |
| "grad_norm": 0.7704390882655109, | |
| "learning_rate": 9.945196045042812e-06, | |
| "loss": 0.41, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.4345857854232684, | |
| "grad_norm": 0.9003987196323937, | |
| "learning_rate": 9.93988894033011e-06, | |
| "loss": 0.4114, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.4406216991097027, | |
| "grad_norm": 0.8729571471009108, | |
| "learning_rate": 9.934338157040953e-06, | |
| "loss": 0.4128, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.446657612796137, | |
| "grad_norm": 0.7801434856688376, | |
| "learning_rate": 9.928543968988576e-06, | |
| "loss": 0.4103, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.4526935264825713, | |
| "grad_norm": 0.9417689284475159, | |
| "learning_rate": 9.922506661993067e-06, | |
| "loss": 0.4086, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.4587294401690056, | |
| "grad_norm": 0.8877368018323296, | |
| "learning_rate": 9.91622653386727e-06, | |
| "loss": 0.4139, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.4647653538554399, | |
| "grad_norm": 0.7960343939884429, | |
| "learning_rate": 9.909703894402093e-06, | |
| "loss": 0.4072, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.4708012675418741, | |
| "grad_norm": 0.7142525800658928, | |
| "learning_rate": 9.90293906535123e-06, | |
| "loss": 0.4069, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.4768371812283084, | |
| "grad_norm": 0.8168998091378754, | |
| "learning_rate": 9.895932380415277e-06, | |
| "loss": 0.4053, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.48287309491474273, | |
| "grad_norm": 0.7851582099155968, | |
| "learning_rate": 9.888684185225291e-06, | |
| "loss": 0.4096, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.48890900860117703, | |
| "grad_norm": 0.7313895363802666, | |
| "learning_rate": 9.881194837325722e-06, | |
| "loss": 0.4035, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.4949449222876113, | |
| "grad_norm": 0.801599057157289, | |
| "learning_rate": 9.873464706156785e-06, | |
| "loss": 0.4082, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.5009808359740455, | |
| "grad_norm": 0.7959824627607599, | |
| "learning_rate": 9.865494173036238e-06, | |
| "loss": 0.4086, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.5070167496604798, | |
| "grad_norm": 0.7643194639900054, | |
| "learning_rate": 9.857283631140563e-06, | |
| "loss": 0.4097, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.5130526633469141, | |
| "grad_norm": 0.8141162481887632, | |
| "learning_rate": 9.848833485485577e-06, | |
| "loss": 0.4068, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.5190885770333484, | |
| "grad_norm": 0.7263606575446551, | |
| "learning_rate": 9.840144152906455e-06, | |
| "loss": 0.4052, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.5251244907197827, | |
| "grad_norm": 0.7326820835121685, | |
| "learning_rate": 9.831216062037163e-06, | |
| "loss": 0.403, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.531160404406217, | |
| "grad_norm": 0.7722145618849807, | |
| "learning_rate": 9.822049653289318e-06, | |
| "loss": 0.4041, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.5371963180926512, | |
| "grad_norm": 0.7035970302521439, | |
| "learning_rate": 9.81264537883046e-06, | |
| "loss": 0.401, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.5432322317790855, | |
| "grad_norm": 0.6580207236042055, | |
| "learning_rate": 9.803003702561753e-06, | |
| "loss": 0.4057, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.5492681454655198, | |
| "grad_norm": 0.6960070468306416, | |
| "learning_rate": 9.79312510009509e-06, | |
| "loss": 0.4103, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.5553040591519541, | |
| "grad_norm": 0.7088936549744779, | |
| "learning_rate": 9.783010058729644e-06, | |
| "loss": 0.4024, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.5613399728383884, | |
| "grad_norm": 0.8173990374915286, | |
| "learning_rate": 9.772659077427824e-06, | |
| "loss": 0.3983, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.5673758865248227, | |
| "grad_norm": 0.7248588219467303, | |
| "learning_rate": 9.762072666790658e-06, | |
| "loss": 0.4042, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.5734118002112569, | |
| "grad_norm": 0.6953286894486166, | |
| "learning_rate": 9.751251349032615e-06, | |
| "loss": 0.4052, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.5794477138976912, | |
| "grad_norm": 0.6805775618542874, | |
| "learning_rate": 9.74019565795584e-06, | |
| "loss": 0.4028, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.5854836275841255, | |
| "grad_norm": 0.7073250522342893, | |
| "learning_rate": 9.728906138923823e-06, | |
| "loss": 0.4031, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.5915195412705598, | |
| "grad_norm": 0.8161486510568995, | |
| "learning_rate": 9.71738334883449e-06, | |
| "loss": 0.4012, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.5975554549569941, | |
| "grad_norm": 0.7478470587664012, | |
| "learning_rate": 9.705627856092743e-06, | |
| "loss": 0.4035, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.6035913686434284, | |
| "grad_norm": 1.2181648223419725, | |
| "learning_rate": 9.69364024058242e-06, | |
| "loss": 0.3994, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.6096272823298626, | |
| "grad_norm": 0.724496170506016, | |
| "learning_rate": 9.681421093637677e-06, | |
| "loss": 0.4003, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.6156631960162969, | |
| "grad_norm": 0.7245373569956688, | |
| "learning_rate": 9.668971018013835e-06, | |
| "loss": 0.3993, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.6216991097027312, | |
| "grad_norm": 1.3707555561464966, | |
| "learning_rate": 9.656290627857638e-06, | |
| "loss": 0.4031, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.6277350233891655, | |
| "grad_norm": 0.8617205371794142, | |
| "learning_rate": 9.643380548676957e-06, | |
| "loss": 0.3989, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.6337709370755998, | |
| "grad_norm": 0.7218421707442351, | |
| "learning_rate": 9.63024141730994e-06, | |
| "loss": 0.4009, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.6398068507620341, | |
| "grad_norm": 0.7919863849580143, | |
| "learning_rate": 9.616873881893593e-06, | |
| "loss": 0.402, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.6458427644484683, | |
| "grad_norm": 0.7643496416415103, | |
| "learning_rate": 9.603278601831806e-06, | |
| "loss": 0.3966, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.6518786781349026, | |
| "grad_norm": 0.8387350986976135, | |
| "learning_rate": 9.58945624776284e-06, | |
| "loss": 0.3974, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.6579145918213369, | |
| "grad_norm": 0.7195707742464319, | |
| "learning_rate": 9.575407501526218e-06, | |
| "loss": 0.4033, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.6639505055077712, | |
| "grad_norm": 0.8948583587192116, | |
| "learning_rate": 9.561133056129122e-06, | |
| "loss": 0.4005, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.6699864191942055, | |
| "grad_norm": 0.7784558611785358, | |
| "learning_rate": 9.546633615712184e-06, | |
| "loss": 0.3969, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.6760223328806398, | |
| "grad_norm": 0.7279188084081983, | |
| "learning_rate": 9.531909895514766e-06, | |
| "loss": 0.3968, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.6820582465670741, | |
| "grad_norm": 0.7707824454002812, | |
| "learning_rate": 9.516962621839667e-06, | |
| "loss": 0.3941, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.6880941602535083, | |
| "grad_norm": 0.7559246242676043, | |
| "learning_rate": 9.501792532017304e-06, | |
| "loss": 0.3935, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.6941300739399426, | |
| "grad_norm": 0.7670492895949397, | |
| "learning_rate": 9.48640037436934e-06, | |
| "loss": 0.3962, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.7001659876263769, | |
| "grad_norm": 0.7574175499302432, | |
| "learning_rate": 9.470786908171761e-06, | |
| "loss": 0.396, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.7062019013128112, | |
| "grad_norm": 1.1364368407573255, | |
| "learning_rate": 9.454952903617434e-06, | |
| "loss": 0.3987, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.7122378149992455, | |
| "grad_norm": 0.6929517509246322, | |
| "learning_rate": 9.438899141778105e-06, | |
| "loss": 0.3959, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.7182737286856798, | |
| "grad_norm": 0.7239918001848392, | |
| "learning_rate": 9.42262641456588e-06, | |
| "loss": 0.3961, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.724309642372114, | |
| "grad_norm": 0.7351627240649914, | |
| "learning_rate": 9.406135524694146e-06, | |
| "loss": 0.3946, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.7303455560585483, | |
| "grad_norm": 0.7178193311197739, | |
| "learning_rate": 9.389427285637986e-06, | |
| "loss": 0.3934, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.7363814697449826, | |
| "grad_norm": 0.7197436378060236, | |
| "learning_rate": 9.372502521594052e-06, | |
| "loss": 0.3951, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.7424173834314169, | |
| "grad_norm": 0.7020942866993558, | |
| "learning_rate": 9.355362067439899e-06, | |
| "loss": 0.3953, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.7484532971178512, | |
| "grad_norm": 0.6493652144119091, | |
| "learning_rate": 9.338006768692807e-06, | |
| "loss": 0.3976, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.7544892108042855, | |
| "grad_norm": 0.7452091082245685, | |
| "learning_rate": 9.320437481468077e-06, | |
| "loss": 0.3947, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.7605251244907197, | |
| "grad_norm": 0.7211982596336295, | |
| "learning_rate": 9.302655072436789e-06, | |
| "loss": 0.3978, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.766561038177154, | |
| "grad_norm": 0.8069527677411222, | |
| "learning_rate": 9.284660418783064e-06, | |
| "loss": 0.3961, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.7725969518635883, | |
| "grad_norm": 0.6964974366663241, | |
| "learning_rate": 9.266454408160779e-06, | |
| "loss": 0.395, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.7786328655500226, | |
| "grad_norm": 0.6951835215600591, | |
| "learning_rate": 9.248037938649792e-06, | |
| "loss": 0.3918, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.784668779236457, | |
| "grad_norm": 0.7011033108204148, | |
| "learning_rate": 9.229411918711637e-06, | |
| "loss": 0.3911, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.7907046929228913, | |
| "grad_norm": 0.6699999752789259, | |
| "learning_rate": 9.210577267144703e-06, | |
| "loss": 0.3917, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.7967406066093254, | |
| "grad_norm": 0.7952469588442095, | |
| "learning_rate": 9.191534913038926e-06, | |
| "loss": 0.393, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.8027765202957597, | |
| "grad_norm": 0.7362949625214187, | |
| "learning_rate": 9.172285795729945e-06, | |
| "loss": 0.3916, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.808812433982194, | |
| "grad_norm": 0.777349182077021, | |
| "learning_rate": 9.152830864752773e-06, | |
| "loss": 0.396, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.8148483476686283, | |
| "grad_norm": 0.6858011231159463, | |
| "learning_rate": 9.133171079794952e-06, | |
| "loss": 0.3949, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.8208842613550626, | |
| "grad_norm": 0.8252893789848457, | |
| "learning_rate": 9.113307410649222e-06, | |
| "loss": 0.3951, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.826920175041497, | |
| "grad_norm": 0.742614174317752, | |
| "learning_rate": 9.093240837165668e-06, | |
| "loss": 0.3912, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.8329560887279311, | |
| "grad_norm": 0.6712408370389595, | |
| "learning_rate": 9.072972349203401e-06, | |
| "loss": 0.3938, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.8389920024143654, | |
| "grad_norm": 0.7390425813359819, | |
| "learning_rate": 9.052502946581718e-06, | |
| "loss": 0.3902, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.8450279161007997, | |
| "grad_norm": 0.9031901060003036, | |
| "learning_rate": 9.031833639030789e-06, | |
| "loss": 0.39, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.851063829787234, | |
| "grad_norm": 0.8073830235615219, | |
| "learning_rate": 9.010965446141842e-06, | |
| "loss": 0.3907, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.8570997434736684, | |
| "grad_norm": 0.7197468777451328, | |
| "learning_rate": 8.989899397316875e-06, | |
| "loss": 0.3933, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.8631356571601027, | |
| "grad_norm": 0.7874409375571629, | |
| "learning_rate": 8.96863653171787e-06, | |
| "loss": 0.3941, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.8691715708465368, | |
| "grad_norm": 0.7047790860975574, | |
| "learning_rate": 8.947177898215538e-06, | |
| "loss": 0.3918, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.8752074845329711, | |
| "grad_norm": 0.6732410856766448, | |
| "learning_rate": 8.925524555337575e-06, | |
| "loss": 0.3948, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.8812433982194054, | |
| "grad_norm": 0.6379130166882847, | |
| "learning_rate": 8.90367757121645e-06, | |
| "loss": 0.392, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.8872793119058398, | |
| "grad_norm": 0.6453169279070088, | |
| "learning_rate": 8.881638023536715e-06, | |
| "loss": 0.3902, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.893315225592274, | |
| "grad_norm": 0.8925532684482897, | |
| "learning_rate": 8.859406999481839e-06, | |
| "loss": 0.3897, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.8993511392787084, | |
| "grad_norm": 0.7321151042406583, | |
| "learning_rate": 8.836985595680585e-06, | |
| "loss": 0.3903, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.9053870529651425, | |
| "grad_norm": 0.717542202485072, | |
| "learning_rate": 8.81437491815291e-06, | |
| "loss": 0.3907, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.9114229666515768, | |
| "grad_norm": 0.6899069830042462, | |
| "learning_rate": 8.791576082255414e-06, | |
| "loss": 0.3914, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.9174588803380112, | |
| "grad_norm": 0.7416902913208727, | |
| "learning_rate": 8.768590212626305e-06, | |
| "loss": 0.3914, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.9234947940244455, | |
| "grad_norm": 0.648187852127454, | |
| "learning_rate": 8.745418443129944e-06, | |
| "loss": 0.3878, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.9295307077108798, | |
| "grad_norm": 0.6971446829374528, | |
| "learning_rate": 8.722061916800892e-06, | |
| "loss": 0.3889, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.9355666213973141, | |
| "grad_norm": 0.6897656341763103, | |
| "learning_rate": 8.698521785787543e-06, | |
| "loss": 0.3916, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.9416025350837482, | |
| "grad_norm": 0.6707821534631215, | |
| "learning_rate": 8.674799211295272e-06, | |
| "loss": 0.3872, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.9476384487701826, | |
| "grad_norm": 0.7047440310341709, | |
| "learning_rate": 8.650895363529172e-06, | |
| "loss": 0.3893, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.9536743624566169, | |
| "grad_norm": 0.7111300925227007, | |
| "learning_rate": 8.626811421636318e-06, | |
| "loss": 0.3899, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.9597102761430512, | |
| "grad_norm": 0.742242466940292, | |
| "learning_rate": 8.602548573647603e-06, | |
| "loss": 0.3933, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.9657461898294855, | |
| "grad_norm": 0.6405514647772552, | |
| "learning_rate": 8.578108016419138e-06, | |
| "loss": 0.3886, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.9717821035159198, | |
| "grad_norm": 0.6969067995610034, | |
| "learning_rate": 8.553490955573207e-06, | |
| "loss": 0.3875, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.9778180172023541, | |
| "grad_norm": 0.6404080311189763, | |
| "learning_rate": 8.528698605438801e-06, | |
| "loss": 0.3915, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.9838539308887883, | |
| "grad_norm": 0.689314089106684, | |
| "learning_rate": 8.50373218899171e-06, | |
| "loss": 0.3897, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.9898898445752226, | |
| "grad_norm": 0.6238451440610306, | |
| "learning_rate": 8.478592937794202e-06, | |
| "loss": 0.3865, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.9959257582616569, | |
| "grad_norm": 0.6246538104726604, | |
| "learning_rate": 8.453282091934262e-06, | |
| "loss": 0.3891, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.0018107741059303, | |
| "grad_norm": 0.6650133535244673, | |
| "learning_rate": 8.427800899964438e-06, | |
| "loss": 0.3775, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.0078466877923646, | |
| "grad_norm": 0.7340465665361768, | |
| "learning_rate": 8.402150618840229e-06, | |
| "loss": 0.3658, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.013882601478799, | |
| "grad_norm": 0.8803678131362109, | |
| "learning_rate": 8.376332513858091e-06, | |
| "loss": 0.3643, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.0199185151652332, | |
| "grad_norm": 0.6784266807756097, | |
| "learning_rate": 8.350347858593035e-06, | |
| "loss": 0.3632, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.0259544288516673, | |
| "grad_norm": 0.6757297253946429, | |
| "learning_rate": 8.324197934835775e-06, | |
| "loss": 0.3611, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.0319903425381016, | |
| "grad_norm": 0.6937615226816463, | |
| "learning_rate": 8.297884032529525e-06, | |
| "loss": 0.3641, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.038026256224536, | |
| "grad_norm": 0.6656265896882699, | |
| "learning_rate": 8.271407449706347e-06, | |
| "loss": 0.3634, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.0440621699109702, | |
| "grad_norm": 0.6758693000716391, | |
| "learning_rate": 8.244769492423144e-06, | |
| "loss": 0.3651, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.0500980835974045, | |
| "grad_norm": 0.7271602756269683, | |
| "learning_rate": 8.217971474697205e-06, | |
| "loss": 0.3655, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.0561339972838388, | |
| "grad_norm": 0.7262048623607191, | |
| "learning_rate": 8.191014718441413e-06, | |
| "loss": 0.3646, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.0621699109702731, | |
| "grad_norm": 0.7594858496478063, | |
| "learning_rate": 8.163900553399022e-06, | |
| "loss": 0.3683, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.0682058246567074, | |
| "grad_norm": 0.6834326812737692, | |
| "learning_rate": 8.13663031707806e-06, | |
| "loss": 0.3657, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.0742417383431417, | |
| "grad_norm": 0.829231127715137, | |
| "learning_rate": 8.109205354685367e-06, | |
| "loss": 0.3657, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.080277652029576, | |
| "grad_norm": 0.7172584884654448, | |
| "learning_rate": 8.081627019060223e-06, | |
| "loss": 0.3612, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.0863135657160103, | |
| "grad_norm": 0.700123283944604, | |
| "learning_rate": 8.053896670607616e-06, | |
| "loss": 0.3669, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.0923494794024446, | |
| "grad_norm": 0.6802763184360072, | |
| "learning_rate": 8.026015677231137e-06, | |
| "loss": 0.36, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.0983853930888787, | |
| "grad_norm": 0.6976972839342949, | |
| "learning_rate": 7.997985414265513e-06, | |
| "loss": 0.3645, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.104421306775313, | |
| "grad_norm": 0.6892045690564895, | |
| "learning_rate": 7.969807264408745e-06, | |
| "loss": 0.3664, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.1104572204617473, | |
| "grad_norm": 0.6606374628961976, | |
| "learning_rate": 7.94148261765391e-06, | |
| "loss": 0.3611, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.1164931341481816, | |
| "grad_norm": 0.7063672325182395, | |
| "learning_rate": 7.913012871220605e-06, | |
| "loss": 0.3652, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.122529047834616, | |
| "grad_norm": 0.6353061774622171, | |
| "learning_rate": 7.884399429486e-06, | |
| "loss": 0.3619, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.1285649615210502, | |
| "grad_norm": 0.6646621743965846, | |
| "learning_rate": 7.855643703915585e-06, | |
| "loss": 0.3638, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.1346008752074845, | |
| "grad_norm": 0.6379034557335701, | |
| "learning_rate": 7.826747112993532e-06, | |
| "loss": 0.3595, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.1406367888939188, | |
| "grad_norm": 0.6995974469144366, | |
| "learning_rate": 7.797711082152726e-06, | |
| "loss": 0.3628, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.1466727025803531, | |
| "grad_norm": 0.6564170955860726, | |
| "learning_rate": 7.768537043704447e-06, | |
| "loss": 0.3637, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.1527086162667874, | |
| "grad_norm": 0.7572552114374352, | |
| "learning_rate": 7.739226436767721e-06, | |
| "loss": 0.362, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.1587445299532217, | |
| "grad_norm": 0.7571612085211564, | |
| "learning_rate": 7.709780707198328e-06, | |
| "loss": 0.3638, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.164780443639656, | |
| "grad_norm": 0.6792493024466744, | |
| "learning_rate": 7.680201307517479e-06, | |
| "loss": 0.3625, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.1708163573260904, | |
| "grad_norm": 0.664259682779261, | |
| "learning_rate": 7.650489696840164e-06, | |
| "loss": 0.3646, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.1768522710125244, | |
| "grad_norm": 0.6270149603322056, | |
| "learning_rate": 7.6206473408031775e-06, | |
| "loss": 0.3624, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.1828881846989587, | |
| "grad_norm": 0.6383894085325998, | |
| "learning_rate": 7.590675711492823e-06, | |
| "loss": 0.3643, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.188924098385393, | |
| "grad_norm": 0.6816453891866903, | |
| "learning_rate": 7.56057628737229e-06, | |
| "loss": 0.3637, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 1.1949600120718273, | |
| "grad_norm": 0.7133078108250313, | |
| "learning_rate": 7.530350553208726e-06, | |
| "loss": 0.3585, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.2009959257582616, | |
| "grad_norm": 0.6322767475179056, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.361, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 1.207031839444696, | |
| "grad_norm": 0.6987380190815154, | |
| "learning_rate": 7.469526124901149e-06, | |
| "loss": 0.3623, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.2130677531311302, | |
| "grad_norm": 0.6219916214226197, | |
| "learning_rate": 7.4389304311505195e-06, | |
| "loss": 0.3637, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 1.2191036668175645, | |
| "grad_norm": 0.6591583924033313, | |
| "learning_rate": 7.408214427995628e-06, | |
| "loss": 0.3644, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.2251395805039988, | |
| "grad_norm": 0.7005471225701302, | |
| "learning_rate": 7.3773796306187e-06, | |
| "loss": 0.3595, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 1.2311754941904332, | |
| "grad_norm": 0.6332845796820719, | |
| "learning_rate": 7.346427560061931e-06, | |
| "loss": 0.3652, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.2372114078768675, | |
| "grad_norm": 0.6778224076333697, | |
| "learning_rate": 7.315359743152464e-06, | |
| "loss": 0.3606, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.2432473215633015, | |
| "grad_norm": 0.6582665893949518, | |
| "learning_rate": 7.284177712427056e-06, | |
| "loss": 0.3599, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.2492832352497358, | |
| "grad_norm": 0.6584059931101761, | |
| "learning_rate": 7.252883006056495e-06, | |
| "loss": 0.3622, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 1.2553191489361701, | |
| "grad_norm": 0.6857700496450303, | |
| "learning_rate": 7.221477167769716e-06, | |
| "loss": 0.3633, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.2613550626226044, | |
| "grad_norm": 0.6856644672766703, | |
| "learning_rate": 7.189961746777657e-06, | |
| "loss": 0.363, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 1.2673909763090387, | |
| "grad_norm": 0.6857005736783666, | |
| "learning_rate": 7.1583382976968295e-06, | |
| "loss": 0.3618, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.273426889995473, | |
| "grad_norm": 0.6166440607694041, | |
| "learning_rate": 7.126608380472642e-06, | |
| "loss": 0.3593, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 1.2794628036819073, | |
| "grad_norm": 0.6673854300030073, | |
| "learning_rate": 7.094773560302438e-06, | |
| "loss": 0.3616, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.2854987173683416, | |
| "grad_norm": 0.6261609808400934, | |
| "learning_rate": 7.062835407558295e-06, | |
| "loss": 0.3623, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 1.291534631054776, | |
| "grad_norm": 0.6573770008704372, | |
| "learning_rate": 7.030795497709559e-06, | |
| "loss": 0.3616, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.2975705447412103, | |
| "grad_norm": 0.63175357402283, | |
| "learning_rate": 6.99865541124513e-06, | |
| "loss": 0.363, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.3036064584276446, | |
| "grad_norm": 0.7095581591416922, | |
| "learning_rate": 6.9664167335954866e-06, | |
| "loss": 0.3604, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.3096423721140789, | |
| "grad_norm": 0.6211244267814455, | |
| "learning_rate": 6.9340810550545004e-06, | |
| "loss": 0.3584, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 1.3156782858005132, | |
| "grad_norm": 0.6411383893721285, | |
| "learning_rate": 6.901649970700966e-06, | |
| "loss": 0.3616, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.3217141994869475, | |
| "grad_norm": 0.6508872294411808, | |
| "learning_rate": 6.869125080319934e-06, | |
| "loss": 0.3626, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 1.3277501131733815, | |
| "grad_norm": 0.6456129899609592, | |
| "learning_rate": 6.836507988323785e-06, | |
| "loss": 0.3612, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.3337860268598158, | |
| "grad_norm": 0.6885055595324049, | |
| "learning_rate": 6.803800303673096e-06, | |
| "loss": 0.3588, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 1.3398219405462501, | |
| "grad_norm": 0.6841559054058574, | |
| "learning_rate": 6.77100363979726e-06, | |
| "loss": 0.3608, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.3458578542326844, | |
| "grad_norm": 0.7229876827512576, | |
| "learning_rate": 6.738119614514913e-06, | |
| "loss": 0.3655, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.3518937679191187, | |
| "grad_norm": 0.6235312062043321, | |
| "learning_rate": 6.705149849954116e-06, | |
| "loss": 0.3607, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.357929681605553, | |
| "grad_norm": 0.6372979896414575, | |
| "learning_rate": 6.672095972472339e-06, | |
| "loss": 0.3613, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.3639655952919874, | |
| "grad_norm": 0.5943237749223176, | |
| "learning_rate": 6.638959612576243e-06, | |
| "loss": 0.3578, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.3700015089784217, | |
| "grad_norm": 0.6331473442190148, | |
| "learning_rate": 6.605742404841241e-06, | |
| "loss": 0.3606, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.376037422664856, | |
| "grad_norm": 0.6352200712052698, | |
| "learning_rate": 6.572445987830869e-06, | |
| "loss": 0.3602, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.38207333635129, | |
| "grad_norm": 0.6315011206585134, | |
| "learning_rate": 6.539072004015962e-06, | |
| "loss": 0.3585, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.3881092500377243, | |
| "grad_norm": 0.672467399271792, | |
| "learning_rate": 6.505622099693624e-06, | |
| "loss": 0.359, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.3941451637241586, | |
| "grad_norm": 0.6540330679200106, | |
| "learning_rate": 6.4720979249060245e-06, | |
| "loss": 0.357, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.400181077410593, | |
| "grad_norm": 0.6296334356002367, | |
| "learning_rate": 6.438501133359006e-06, | |
| "loss": 0.363, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.4062169910970272, | |
| "grad_norm": 0.5755292937597596, | |
| "learning_rate": 6.404833382340498e-06, | |
| "loss": 0.3579, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.4122529047834615, | |
| "grad_norm": 0.6273216809842853, | |
| "learning_rate": 6.3710963326387845e-06, | |
| "loss": 0.361, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.4182888184698959, | |
| "grad_norm": 0.659504858020357, | |
| "learning_rate": 6.337291648460554e-06, | |
| "loss": 0.3648, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.4243247321563302, | |
| "grad_norm": 0.646430703430766, | |
| "learning_rate": 6.303420997348828e-06, | |
| "loss": 0.3609, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.4303606458427645, | |
| "grad_norm": 0.70677217944382, | |
| "learning_rate": 6.269486050100692e-06, | |
| "loss": 0.3583, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.4363965595291988, | |
| "grad_norm": 0.6982928562021034, | |
| "learning_rate": 6.2354884806848825e-06, | |
| "loss": 0.3587, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.442432473215633, | |
| "grad_norm": 0.635748838083391, | |
| "learning_rate": 6.201429966159203e-06, | |
| "loss": 0.3603, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.4484683869020674, | |
| "grad_norm": 0.6591941857655591, | |
| "learning_rate": 6.167312186587813e-06, | |
| "loss": 0.3587, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.4545043005885017, | |
| "grad_norm": 0.6513018732706167, | |
| "learning_rate": 6.133136824958334e-06, | |
| "loss": 0.3583, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.460540214274936, | |
| "grad_norm": 0.6895727383237782, | |
| "learning_rate": 6.098905567098846e-06, | |
| "loss": 0.3638, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.4665761279613703, | |
| "grad_norm": 0.6281650394691185, | |
| "learning_rate": 6.064620101594715e-06, | |
| "loss": 0.3629, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.4726120416478046, | |
| "grad_norm": 0.7324490252015554, | |
| "learning_rate": 6.030282119705306e-06, | |
| "loss": 0.3621, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.4786479553342387, | |
| "grad_norm": 0.6803933740478001, | |
| "learning_rate": 5.99589331528055e-06, | |
| "loss": 0.3613, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.484683869020673, | |
| "grad_norm": 0.6535344969186776, | |
| "learning_rate": 5.961455384677393e-06, | |
| "loss": 0.3588, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.4907197827071073, | |
| "grad_norm": 0.6220530519094237, | |
| "learning_rate": 5.92697002667611e-06, | |
| "loss": 0.3614, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.4967556963935416, | |
| "grad_norm": 0.5997735782443615, | |
| "learning_rate": 5.892438942396515e-06, | |
| "loss": 0.3562, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.5027916100799759, | |
| "grad_norm": 0.5881600037112182, | |
| "learning_rate": 5.857863835214041e-06, | |
| "loss": 0.36, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.5088275237664102, | |
| "grad_norm": 0.6301732957095514, | |
| "learning_rate": 5.823246410675714e-06, | |
| "loss": 0.3602, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.5148634374528445, | |
| "grad_norm": 0.6369138058336548, | |
| "learning_rate": 5.788588376416026e-06, | |
| "loss": 0.3575, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.5208993511392785, | |
| "grad_norm": 1.8916358390305654, | |
| "learning_rate": 5.753891442072693e-06, | |
| "loss": 0.3584, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.5269352648257128, | |
| "grad_norm": 0.6400402583906231, | |
| "learning_rate": 5.719157319202325e-06, | |
| "loss": 0.3539, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.5329711785121471, | |
| "grad_norm": 0.6223661041265537, | |
| "learning_rate": 5.684387721195997e-06, | |
| "loss": 0.3595, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.5390070921985815, | |
| "grad_norm": 0.6649761362975228, | |
| "learning_rate": 5.649584363194725e-06, | |
| "loss": 0.36, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.5450430058850158, | |
| "grad_norm": 0.5989851062495032, | |
| "learning_rate": 5.6147489620048655e-06, | |
| "loss": 0.3582, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.55107891957145, | |
| "grad_norm": 0.6435791376898407, | |
| "learning_rate": 5.579883236013429e-06, | |
| "loss": 0.3559, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.5571148332578844, | |
| "grad_norm": 0.5973586913854247, | |
| "learning_rate": 5.544988905103304e-06, | |
| "loss": 0.3581, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.5631507469443187, | |
| "grad_norm": 0.6331916860819433, | |
| "learning_rate": 5.510067690568429e-06, | |
| "loss": 0.3573, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.569186660630753, | |
| "grad_norm": 0.6000249694556851, | |
| "learning_rate": 5.475121315028876e-06, | |
| "loss": 0.3574, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.5752225743171873, | |
| "grad_norm": 0.5919987411148389, | |
| "learning_rate": 5.4401515023458805e-06, | |
| "loss": 0.3622, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.5812584880036216, | |
| "grad_norm": 0.6130160505042299, | |
| "learning_rate": 5.4051599775368e-06, | |
| "loss": 0.3585, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.5872944016900559, | |
| "grad_norm": 0.6196465067482942, | |
| "learning_rate": 5.370148466690026e-06, | |
| "loss": 0.3524, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.5933303153764902, | |
| "grad_norm": 0.6396523422153624, | |
| "learning_rate": 5.335118696879836e-06, | |
| "loss": 0.3584, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.5993662290629245, | |
| "grad_norm": 0.6247037129381725, | |
| "learning_rate": 5.3000723960812e-06, | |
| "loss": 0.358, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.6054021427493588, | |
| "grad_norm": 0.6296280096461855, | |
| "learning_rate": 5.265011293084539e-06, | |
| "loss": 0.3557, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.611438056435793, | |
| "grad_norm": 0.6270649643037325, | |
| "learning_rate": 5.2299371174104505e-06, | |
| "loss": 0.3586, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.6174739701222274, | |
| "grad_norm": 0.6724245016825049, | |
| "learning_rate": 5.194851599224392e-06, | |
| "loss": 0.3563, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.6235098838086617, | |
| "grad_norm": 0.6246722692854128, | |
| "learning_rate": 5.159756469251327e-06, | |
| "loss": 0.3587, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.629545797495096, | |
| "grad_norm": 0.5856892553580461, | |
| "learning_rate": 5.1246534586903655e-06, | |
| "loss": 0.3538, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.63558171118153, | |
| "grad_norm": 0.6199649535926036, | |
| "learning_rate": 5.089544299129349e-06, | |
| "loss": 0.3552, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.6416176248679644, | |
| "grad_norm": 0.6395106688159933, | |
| "learning_rate": 5.054430722459442e-06, | |
| "loss": 0.3575, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.6476535385543987, | |
| "grad_norm": 0.6217763272730691, | |
| "learning_rate": 5.019314460789708e-06, | |
| "loss": 0.3568, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.653689452240833, | |
| "grad_norm": 0.6159996290026578, | |
| "learning_rate": 4.984197246361649e-06, | |
| "loss": 0.3565, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.6597253659272673, | |
| "grad_norm": 0.6021051813495957, | |
| "learning_rate": 4.949080811463767e-06, | |
| "loss": 0.3577, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.6657612796137016, | |
| "grad_norm": 0.6102206368388114, | |
| "learning_rate": 4.913966888346118e-06, | |
| "loss": 0.3556, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.6717971933001357, | |
| "grad_norm": 0.5968837038838994, | |
| "learning_rate": 4.8788572091348435e-06, | |
| "loss": 0.3581, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.67783310698657, | |
| "grad_norm": 0.5981355700097328, | |
| "learning_rate": 4.843753505746748e-06, | |
| "loss": 0.358, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.6838690206730043, | |
| "grad_norm": 0.6567740858768865, | |
| "learning_rate": 4.8086575098038505e-06, | |
| "loss": 0.3573, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.6899049343594386, | |
| "grad_norm": 0.6773288375423023, | |
| "learning_rate": 4.773570952547975e-06, | |
| "loss": 0.3552, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.6959408480458729, | |
| "grad_norm": 0.6202686068367487, | |
| "learning_rate": 4.738495564755345e-06, | |
| "loss": 0.3547, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.7019767617323072, | |
| "grad_norm": 0.5595337919079114, | |
| "learning_rate": 4.703433076651205e-06, | |
| "loss": 0.353, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.7080126754187415, | |
| "grad_norm": 0.6583890978208258, | |
| "learning_rate": 4.668385217824482e-06, | |
| "loss": 0.3583, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.7140485891051758, | |
| "grad_norm": 0.5898922057879373, | |
| "learning_rate": 4.633353717142448e-06, | |
| "loss": 0.3524, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.72008450279161, | |
| "grad_norm": 0.5938698503556435, | |
| "learning_rate": 4.5983403026654625e-06, | |
| "loss": 0.3554, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.7261204164780444, | |
| "grad_norm": 0.632653867195755, | |
| "learning_rate": 4.563346701561699e-06, | |
| "loss": 0.3535, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.7321563301644787, | |
| "grad_norm": 0.634481958151908, | |
| "learning_rate": 4.528374640021975e-06, | |
| "loss": 0.3548, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.738192243850913, | |
| "grad_norm": 0.6554591212571549, | |
| "learning_rate": 4.493425843174581e-06, | |
| "loss": 0.3523, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.7442281575373473, | |
| "grad_norm": 0.639030241328894, | |
| "learning_rate": 4.4585020350001885e-06, | |
| "loss": 0.3571, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.7502640712237816, | |
| "grad_norm": 0.579081823243162, | |
| "learning_rate": 4.423604938246815e-06, | |
| "loss": 0.358, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.7562999849102159, | |
| "grad_norm": 0.5786332593667859, | |
| "learning_rate": 4.38873627434483e-06, | |
| "loss": 0.3546, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.7623358985966502, | |
| "grad_norm": 0.5844630643462843, | |
| "learning_rate": 4.353897763322053e-06, | |
| "loss": 0.3557, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.7683718122830845, | |
| "grad_norm": 0.6362540824300466, | |
| "learning_rate": 4.319091123718891e-06, | |
| "loss": 0.3577, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.7744077259695188, | |
| "grad_norm": 0.6152238906869951, | |
| "learning_rate": 4.284318072503581e-06, | |
| "loss": 0.3558, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.7804436396559529, | |
| "grad_norm": 0.5871415463947245, | |
| "learning_rate": 4.249580324987482e-06, | |
| "loss": 0.3565, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.7864795533423872, | |
| "grad_norm": 0.5894304003956816, | |
| "learning_rate": 4.2148795947404664e-06, | |
| "loss": 0.3548, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.7925154670288215, | |
| "grad_norm": 0.5546376741165042, | |
| "learning_rate": 4.180217593506394e-06, | |
| "loss": 0.3545, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.7985513807152558, | |
| "grad_norm": 0.5882950021870835, | |
| "learning_rate": 4.1455960311186645e-06, | |
| "loss": 0.3578, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.80458729440169, | |
| "grad_norm": 0.6581353476419389, | |
| "learning_rate": 4.111016615415887e-06, | |
| "loss": 0.3545, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.8106232080881244, | |
| "grad_norm": 0.728199708802779, | |
| "learning_rate": 4.076481052157621e-06, | |
| "loss": 0.3567, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.8166591217745585, | |
| "grad_norm": 0.5836951966903218, | |
| "learning_rate": 4.0419910449402385e-06, | |
| "loss": 0.3541, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.8226950354609928, | |
| "grad_norm": 0.583825208842142, | |
| "learning_rate": 4.0075482951128965e-06, | |
| "loss": 0.3557, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.828730949147427, | |
| "grad_norm": 0.627394077298899, | |
| "learning_rate": 3.973154501693597e-06, | |
| "loss": 0.352, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.8347668628338614, | |
| "grad_norm": 0.6500394437203815, | |
| "learning_rate": 3.938811361285386e-06, | |
| "loss": 0.3543, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.8408027765202957, | |
| "grad_norm": 0.5787408936785984, | |
| "learning_rate": 3.904520567992655e-06, | |
| "loss": 0.3539, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.84683869020673, | |
| "grad_norm": 0.6006488260082842, | |
| "learning_rate": 3.870283813337587e-06, | |
| "loss": 0.3534, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.8528746038931643, | |
| "grad_norm": 0.6017706438925717, | |
| "learning_rate": 3.836102786176697e-06, | |
| "loss": 0.3533, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.8589105175795986, | |
| "grad_norm": 0.6160731963284618, | |
| "learning_rate": 3.8019791726175353e-06, | |
| "loss": 0.3537, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.8649464312660329, | |
| "grad_norm": 0.7394723530516694, | |
| "learning_rate": 3.767914655935513e-06, | |
| "loss": 0.3512, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.8709823449524672, | |
| "grad_norm": 0.5969802619046902, | |
| "learning_rate": 3.73391091649086e-06, | |
| "loss": 0.3514, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.8770182586389015, | |
| "grad_norm": 0.6434909203687009, | |
| "learning_rate": 3.6999696316457468e-06, | |
| "loss": 0.3525, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.8830541723253358, | |
| "grad_norm": 0.6185839002292769, | |
| "learning_rate": 3.6660924756815314e-06, | |
| "loss": 0.3516, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.88909008601177, | |
| "grad_norm": 0.5764246370880874, | |
| "learning_rate": 3.63228111971618e-06, | |
| "loss": 0.3543, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.8951259996982044, | |
| "grad_norm": 0.5724269342695871, | |
| "learning_rate": 3.5985372316218187e-06, | |
| "loss": 0.3524, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.9011619133846387, | |
| "grad_norm": 0.5893980753783277, | |
| "learning_rate": 3.5648624759424723e-06, | |
| "loss": 0.3487, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.907197827071073, | |
| "grad_norm": 0.6385286384600478, | |
| "learning_rate": 3.5312585138119503e-06, | |
| "loss": 0.353, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.9132337407575073, | |
| "grad_norm": 0.643587632906283, | |
| "learning_rate": 3.4977270028719013e-06, | |
| "loss": 0.3498, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.9192696544439416, | |
| "grad_norm": 0.6189874783125575, | |
| "learning_rate": 3.4642695971900506e-06, | |
| "loss": 0.3542, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.925305568130376, | |
| "grad_norm": 0.6320316722606764, | |
| "learning_rate": 3.4308879471785986e-06, | |
| "loss": 0.3523, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.93134148181681, | |
| "grad_norm": 0.6715762862677156, | |
| "learning_rate": 3.3975836995128176e-06, | |
| "loss": 0.3505, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.9373773955032443, | |
| "grad_norm": 0.5947951437286136, | |
| "learning_rate": 3.3643584970498166e-06, | |
| "loss": 0.356, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.9434133091896786, | |
| "grad_norm": 0.5953138896683005, | |
| "learning_rate": 3.3312139787474986e-06, | |
| "loss": 0.3552, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.9494492228761129, | |
| "grad_norm": 0.5696476474991146, | |
| "learning_rate": 3.298151779583725e-06, | |
| "loss": 0.3496, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.9554851365625472, | |
| "grad_norm": 0.6131972032987533, | |
| "learning_rate": 3.2651735304756505e-06, | |
| "loss": 0.3536, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.9615210502489815, | |
| "grad_norm": 0.6336317988993604, | |
| "learning_rate": 3.2322808581992825e-06, | |
| "loss": 0.3563, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.9675569639354156, | |
| "grad_norm": 0.6341579320490388, | |
| "learning_rate": 3.1994753853092284e-06, | |
| "loss": 0.3482, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.9735928776218499, | |
| "grad_norm": 0.5954681993221721, | |
| "learning_rate": 3.166758730058653e-06, | |
| "loss": 0.3518, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.9796287913082842, | |
| "grad_norm": 0.5893599270303087, | |
| "learning_rate": 3.134132506319467e-06, | |
| "loss": 0.3536, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.9856647049947185, | |
| "grad_norm": 0.5689301232875419, | |
| "learning_rate": 3.101598323502698e-06, | |
| "loss": 0.3537, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.9917006186811528, | |
| "grad_norm": 0.6116819898452338, | |
| "learning_rate": 3.0691577864791176e-06, | |
| "loss": 0.3515, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.997736532367587, | |
| "grad_norm": 0.5926997741101551, | |
| "learning_rate": 3.036812495500058e-06, | |
| "loss": 0.3504, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 2.0036215482118607, | |
| "grad_norm": 0.5928785278377309, | |
| "learning_rate": 3.0045640461184917e-06, | |
| "loss": 0.339, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 2.009657461898295, | |
| "grad_norm": 0.6039984062866832, | |
| "learning_rate": 2.97241402911031e-06, | |
| "loss": 0.3325, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 2.0156933755847293, | |
| "grad_norm": 0.6671960610879556, | |
| "learning_rate": 2.940364030395856e-06, | |
| "loss": 0.3284, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 2.0217292892711636, | |
| "grad_norm": 0.5808483500966948, | |
| "learning_rate": 2.908415630961702e-06, | |
| "loss": 0.3265, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.027765202957598, | |
| "grad_norm": 0.6017580883286716, | |
| "learning_rate": 2.876570406782645e-06, | |
| "loss": 0.3296, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 2.033801116644032, | |
| "grad_norm": 0.6067555273933171, | |
| "learning_rate": 2.844829928743987e-06, | |
| "loss": 0.3315, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 2.0398370303304665, | |
| "grad_norm": 0.5774545226545359, | |
| "learning_rate": 2.813195762564018e-06, | |
| "loss": 0.3268, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 2.0458729440169003, | |
| "grad_norm": 0.5888748284507602, | |
| "learning_rate": 2.781669468716811e-06, | |
| "loss": 0.3292, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 2.0519088577033346, | |
| "grad_norm": 0.6137376399757654, | |
| "learning_rate": 2.7502526023552227e-06, | |
| "loss": 0.3258, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.057944771389769, | |
| "grad_norm": 0.59390579398881, | |
| "learning_rate": 2.718946713234185e-06, | |
| "loss": 0.3295, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 2.0639806850762032, | |
| "grad_norm": 0.6555105104152712, | |
| "learning_rate": 2.6877533456342714e-06, | |
| "loss": 0.3301, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 2.0700165987626375, | |
| "grad_norm": 0.6048063575727766, | |
| "learning_rate": 2.6566740382855005e-06, | |
| "loss": 0.3289, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 2.076052512449072, | |
| "grad_norm": 0.6014841818951663, | |
| "learning_rate": 2.625710324291442e-06, | |
| "loss": 0.3325, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 2.082088426135506, | |
| "grad_norm": 0.6035697169885135, | |
| "learning_rate": 2.5948637310535886e-06, | |
| "loss": 0.3296, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.0881243398219405, | |
| "grad_norm": 0.6112233467387164, | |
| "learning_rate": 2.5641357801960186e-06, | |
| "loss": 0.3278, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 2.0941602535083748, | |
| "grad_norm": 0.5870217829586826, | |
| "learning_rate": 2.5335279874903185e-06, | |
| "loss": 0.3313, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 2.100196167194809, | |
| "grad_norm": 0.5897131296840935, | |
| "learning_rate": 2.503041862780827e-06, | |
| "loss": 0.3296, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 2.1062320808812434, | |
| "grad_norm": 0.5718259687035243, | |
| "learning_rate": 2.47267890991016e-06, | |
| "loss": 0.3281, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 2.1122679945676777, | |
| "grad_norm": 0.5777856500315681, | |
| "learning_rate": 2.4424406266450045e-06, | |
| "loss": 0.3296, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.118303908254112, | |
| "grad_norm": 0.6262457739159312, | |
| "learning_rate": 2.412328504602264e-06, | |
| "loss": 0.3336, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 2.1243398219405463, | |
| "grad_norm": 0.589194023665236, | |
| "learning_rate": 2.382344029175462e-06, | |
| "loss": 0.3349, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 2.1303757356269806, | |
| "grad_norm": 0.6140628916832596, | |
| "learning_rate": 2.3524886794614653e-06, | |
| "loss": 0.331, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 2.136411649313415, | |
| "grad_norm": 0.6028871935735021, | |
| "learning_rate": 2.322763928187543e-06, | |
| "loss": 0.3307, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 2.142447562999849, | |
| "grad_norm": 0.5798390235554982, | |
| "learning_rate": 2.293171241638698e-06, | |
| "loss": 0.3298, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.1484834766862835, | |
| "grad_norm": 0.5950496656474389, | |
| "learning_rate": 2.263712079585345e-06, | |
| "loss": 0.3305, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 2.154519390372718, | |
| "grad_norm": 0.5926734664470145, | |
| "learning_rate": 2.2343878952113012e-06, | |
| "loss": 0.3276, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 2.160555304059152, | |
| "grad_norm": 0.5877698580097848, | |
| "learning_rate": 2.2052001350421096e-06, | |
| "loss": 0.3268, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 2.1665912177455864, | |
| "grad_norm": 0.5888247000199527, | |
| "learning_rate": 2.1761502388736655e-06, | |
| "loss": 0.3327, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 2.1726271314320207, | |
| "grad_norm": 0.5807991121980183, | |
| "learning_rate": 2.14723963970121e-06, | |
| "loss": 0.3315, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.178663045118455, | |
| "grad_norm": 0.5763459777490838, | |
| "learning_rate": 2.118469763648643e-06, | |
| "loss": 0.3278, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 2.1846989588048893, | |
| "grad_norm": 0.5588744726618396, | |
| "learning_rate": 2.0898420298981537e-06, | |
| "loss": 0.3296, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 2.1907348724913236, | |
| "grad_norm": 0.6040859182215225, | |
| "learning_rate": 2.061357850620243e-06, | |
| "loss": 0.3279, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 2.1967707861777575, | |
| "grad_norm": 0.6083091005217864, | |
| "learning_rate": 2.0330186309040394e-06, | |
| "loss": 0.3298, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 2.2028066998641918, | |
| "grad_norm": 0.568667447432841, | |
| "learning_rate": 2.0048257686879997e-06, | |
| "loss": 0.3286, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.208842613550626, | |
| "grad_norm": 0.586169393672314, | |
| "learning_rate": 1.9767806546909457e-06, | |
| "loss": 0.3316, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 2.2148785272370604, | |
| "grad_norm": 0.5855668928973393, | |
| "learning_rate": 1.9488846723434646e-06, | |
| "loss": 0.3262, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 2.2209144409234947, | |
| "grad_norm": 0.5920501956876788, | |
| "learning_rate": 1.921139197719664e-06, | |
| "loss": 0.3298, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 2.226950354609929, | |
| "grad_norm": 0.6343784219115092, | |
| "learning_rate": 1.893545599469292e-06, | |
| "loss": 0.3316, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 2.2329862682963633, | |
| "grad_norm": 0.56167618088226, | |
| "learning_rate": 1.86610523875023e-06, | |
| "loss": 0.3288, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.2390221819827976, | |
| "grad_norm": 0.5937195941687996, | |
| "learning_rate": 1.8388194691613308e-06, | |
| "loss": 0.3285, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 2.245058095669232, | |
| "grad_norm": 0.6068056100462802, | |
| "learning_rate": 1.811689636675672e-06, | |
| "loss": 0.3295, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 2.251094009355666, | |
| "grad_norm": 0.5998463693882512, | |
| "learning_rate": 1.7847170795741414e-06, | |
| "loss": 0.33, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 2.2571299230421005, | |
| "grad_norm": 0.5707846476820784, | |
| "learning_rate": 1.7579031283794234e-06, | |
| "loss": 0.3324, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 2.2631658367285348, | |
| "grad_norm": 0.6070101386107148, | |
| "learning_rate": 1.7312491057903808e-06, | |
| "loss": 0.3288, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.269201750414969, | |
| "grad_norm": 0.5684370763425239, | |
| "learning_rate": 1.7047563266167888e-06, | |
| "loss": 0.3291, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 2.2752376641014034, | |
| "grad_norm": 0.5367883177519198, | |
| "learning_rate": 1.678426097714489e-06, | |
| "loss": 0.3265, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 2.2812735777878377, | |
| "grad_norm": 0.5853244396608877, | |
| "learning_rate": 1.6522597179209187e-06, | |
| "loss": 0.3259, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 2.287309491474272, | |
| "grad_norm": 0.5641343283784108, | |
| "learning_rate": 1.6262584779910472e-06, | |
| "loss": 0.3286, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 2.2933454051607063, | |
| "grad_norm": 0.5563209895809159, | |
| "learning_rate": 1.600423660533692e-06, | |
| "loss": 0.3281, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.2993813188471406, | |
| "grad_norm": 0.5805361043294971, | |
| "learning_rate": 1.5747565399482605e-06, | |
| "loss": 0.3299, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 2.305417232533575, | |
| "grad_norm": 0.5811803574606669, | |
| "learning_rate": 1.5492583823618878e-06, | |
| "loss": 0.3289, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 2.311453146220009, | |
| "grad_norm": 0.6040233888147246, | |
| "learning_rate": 1.523930445566963e-06, | |
| "loss": 0.3308, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 2.3174890599064435, | |
| "grad_norm": 0.6059976475921155, | |
| "learning_rate": 1.4987739789591056e-06, | |
| "loss": 0.3294, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 2.323524973592878, | |
| "grad_norm": 0.5905386095910952, | |
| "learning_rate": 1.4737902234755203e-06, | |
| "loss": 0.3301, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.329560887279312, | |
| "grad_norm": 0.5747067002149818, | |
| "learning_rate": 1.448980411533782e-06, | |
| "loss": 0.3278, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 2.335596800965746, | |
| "grad_norm": 0.5732211405787891, | |
| "learning_rate": 1.4243457669710564e-06, | |
| "loss": 0.3245, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 2.3416327146521807, | |
| "grad_norm": 0.6079651710560006, | |
| "learning_rate": 1.3998875049837141e-06, | |
| "loss": 0.3268, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 2.3476686283386146, | |
| "grad_norm": 0.5783578941572416, | |
| "learning_rate": 1.3756068320673938e-06, | |
| "loss": 0.3283, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 2.353704542025049, | |
| "grad_norm": 0.5532376575030373, | |
| "learning_rate": 1.3515049459574847e-06, | |
| "loss": 0.3254, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.359740455711483, | |
| "grad_norm": 0.5467274114487632, | |
| "learning_rate": 1.3275830355700519e-06, | |
| "loss": 0.3257, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 2.3657763693979175, | |
| "grad_norm": 0.5922264462167515, | |
| "learning_rate": 1.3038422809431733e-06, | |
| "loss": 0.3291, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 2.3718122830843518, | |
| "grad_norm": 0.5807751637804499, | |
| "learning_rate": 1.280283853178742e-06, | |
| "loss": 0.3281, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 2.377848196770786, | |
| "grad_norm": 0.5751202261036737, | |
| "learning_rate": 1.256908914384698e-06, | |
| "loss": 0.3321, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 2.3838841104572204, | |
| "grad_norm": 0.5829573981972134, | |
| "learning_rate": 1.233718617617689e-06, | |
| "loss": 0.3303, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.3899200241436547, | |
| "grad_norm": 0.5614143554083199, | |
| "learning_rate": 1.2107141068262119e-06, | |
| "loss": 0.3276, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 2.395955937830089, | |
| "grad_norm": 0.5657826082869326, | |
| "learning_rate": 1.1878965167941658e-06, | |
| "loss": 0.3279, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 2.4019918515165233, | |
| "grad_norm": 0.5583977788315128, | |
| "learning_rate": 1.1652669730848837e-06, | |
| "loss": 0.3259, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 2.4080277652029576, | |
| "grad_norm": 0.5670227130617606, | |
| "learning_rate": 1.1428265919856057e-06, | |
| "loss": 0.3319, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 2.414063678889392, | |
| "grad_norm": 0.5345020446470288, | |
| "learning_rate": 1.1205764804524172e-06, | |
| "loss": 0.3258, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.420099592575826, | |
| "grad_norm": 0.5742530447532448, | |
| "learning_rate": 1.0985177360556421e-06, | |
| "loss": 0.3281, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 2.4261355062622605, | |
| "grad_norm": 0.5681633515485598, | |
| "learning_rate": 1.0766514469257006e-06, | |
| "loss": 0.33, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 2.432171419948695, | |
| "grad_norm": 0.5469547021834809, | |
| "learning_rate": 1.0549786916994387e-06, | |
| "loss": 0.3271, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 2.438207333635129, | |
| "grad_norm": 0.5467836338693935, | |
| "learning_rate": 1.0335005394669062e-06, | |
| "loss": 0.3282, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 2.4442432473215634, | |
| "grad_norm": 0.5496370736783344, | |
| "learning_rate": 1.012218049718639e-06, | |
| "loss": 0.3267, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.4502791610079977, | |
| "grad_norm": 0.5532695447765059, | |
| "learning_rate": 9.911322722933825e-07, | |
| "loss": 0.3267, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 2.456315074694432, | |
| "grad_norm": 0.5593061519759683, | |
| "learning_rate": 9.702442473263035e-07, | |
| "loss": 0.3261, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 2.4623509883808663, | |
| "grad_norm": 0.5675718980431652, | |
| "learning_rate": 9.495550051976937e-07, | |
| "loss": 0.33, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 2.4683869020673006, | |
| "grad_norm": 0.55036807877547, | |
| "learning_rate": 9.290655664821296e-07, | |
| "loss": 0.326, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 2.474422815753735, | |
| "grad_norm": 0.6047393707132771, | |
| "learning_rate": 9.087769418981352e-07, | |
| "loss": 0.3294, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.480458729440169, | |
| "grad_norm": 0.5377856224781872, | |
| "learning_rate": 8.88690132258323e-07, | |
| "loss": 0.3301, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 2.486494643126603, | |
| "grad_norm": 0.5404023215833121, | |
| "learning_rate": 8.688061284200266e-07, | |
| "loss": 0.3308, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 2.492530556813038, | |
| "grad_norm": 0.5429038964087051, | |
| "learning_rate": 8.491259112364192e-07, | |
| "loss": 0.3277, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 2.4985664704994717, | |
| "grad_norm": 0.5556392061166345, | |
| "learning_rate": 8.296504515081333e-07, | |
| "loss": 0.328, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 2.5046023841859064, | |
| "grad_norm": 0.5550852188468128, | |
| "learning_rate": 8.103807099353733e-07, | |
| "loss": 0.3303, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.5106382978723403, | |
| "grad_norm": 0.5683960534884703, | |
| "learning_rate": 7.913176370705166e-07, | |
| "loss": 0.3303, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 2.5166742115587746, | |
| "grad_norm": 0.5647058376594801, | |
| "learning_rate": 7.724621732712373e-07, | |
| "loss": 0.3281, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 2.522710125245209, | |
| "grad_norm": 0.5396463872633352, | |
| "learning_rate": 7.538152486541078e-07, | |
| "loss": 0.3224, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 2.528746038931643, | |
| "grad_norm": 0.5769965957501234, | |
| "learning_rate": 7.353777830487247e-07, | |
| "loss": 0.3298, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 2.5347819526180775, | |
| "grad_norm": 0.5617546845646423, | |
| "learning_rate": 7.171506859523298e-07, | |
| "loss": 0.3284, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.540817866304512, | |
| "grad_norm": 0.5370456459767287, | |
| "learning_rate": 6.991348564849504e-07, | |
| "loss": 0.3272, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 2.546853779990946, | |
| "grad_norm": 0.5449920129863155, | |
| "learning_rate": 6.813311833450426e-07, | |
| "loss": 0.3244, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 2.5528896936773804, | |
| "grad_norm": 0.5814796250543772, | |
| "learning_rate": 6.637405447656542e-07, | |
| "loss": 0.3286, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 2.5589256073638147, | |
| "grad_norm": 0.5802300234417045, | |
| "learning_rate": 6.463638084711088e-07, | |
| "loss": 0.3303, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 2.564961521050249, | |
| "grad_norm": 0.5682016106324166, | |
| "learning_rate": 6.29201831634188e-07, | |
| "loss": 0.3275, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.5709974347366833, | |
| "grad_norm": 0.628799960343276, | |
| "learning_rate": 6.122554608338605e-07, | |
| "loss": 0.3278, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 2.5770333484231176, | |
| "grad_norm": 0.5261749879449605, | |
| "learning_rate": 5.955255320135195e-07, | |
| "loss": 0.3287, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 2.583069262109552, | |
| "grad_norm": 0.5365103226953842, | |
| "learning_rate": 5.790128704397424e-07, | |
| "loss": 0.3242, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 2.589105175795986, | |
| "grad_norm": 0.5482210552849281, | |
| "learning_rate": 5.627182906615825e-07, | |
| "loss": 0.3254, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 2.5951410894824205, | |
| "grad_norm": 0.5270093070193902, | |
| "learning_rate": 5.466425964703914e-07, | |
| "loss": 0.3268, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.601177003168855, | |
| "grad_norm": 0.5351843851712077, | |
| "learning_rate": 5.307865808601664e-07, | |
| "loss": 0.3267, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 2.607212916855289, | |
| "grad_norm": 0.5551045883829538, | |
| "learning_rate": 5.151510259884329e-07, | |
| "loss": 0.3261, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 2.6132488305417234, | |
| "grad_norm": 0.5716515174477422, | |
| "learning_rate": 4.997367031376627e-07, | |
| "loss": 0.3283, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 2.6192847442281577, | |
| "grad_norm": 0.5484469831279773, | |
| "learning_rate": 4.84544372677228e-07, | |
| "loss": 0.3279, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 2.6253206579145916, | |
| "grad_norm": 0.5627722024643765, | |
| "learning_rate": 4.6957478402589076e-07, | |
| "loss": 0.3285, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.6313565716010263, | |
| "grad_norm": 0.54030007506572, | |
| "learning_rate": 4.548286756148401e-07, | |
| "loss": 0.328, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 2.63739248528746, | |
| "grad_norm": 0.5688872966757411, | |
| "learning_rate": 4.4030677485125906e-07, | |
| "loss": 0.3291, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 2.643428398973895, | |
| "grad_norm": 0.5611453338620043, | |
| "learning_rate": 4.2600979808244627e-07, | |
| "loss": 0.3267, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 2.649464312660329, | |
| "grad_norm": 0.5591585705521456, | |
| "learning_rate": 4.119384505604834e-07, | |
| "loss": 0.3285, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 2.655500226346763, | |
| "grad_norm": 0.5403567309346599, | |
| "learning_rate": 3.980934264074393e-07, | |
| "loss": 0.3234, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.6615361400331974, | |
| "grad_norm": 0.5366841662024877, | |
| "learning_rate": 3.8447540858113197e-07, | |
| "loss": 0.3289, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 2.6675720537196317, | |
| "grad_norm": 0.5505493242335168, | |
| "learning_rate": 3.710850688414419e-07, | |
| "loss": 0.329, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 2.673607967406066, | |
| "grad_norm": 0.5572305600353893, | |
| "learning_rate": 3.579230677171702e-07, | |
| "loss": 0.326, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 2.6796438810925003, | |
| "grad_norm": 0.5320801899819191, | |
| "learning_rate": 3.4499005447346024e-07, | |
| "loss": 0.3272, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 2.6856797947789346, | |
| "grad_norm": 0.5621605698475473, | |
| "learning_rate": 3.32286667079767e-07, | |
| "loss": 0.3232, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 2.691715708465369, | |
| "grad_norm": 0.5535800034831663, | |
| "learning_rate": 3.1981353217838853e-07, | |
| "loss": 0.3267, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 2.697751622151803, | |
| "grad_norm": 0.5541989505631728, | |
| "learning_rate": 3.0757126505355284e-07, | |
| "loss": 0.3271, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 2.7037875358382375, | |
| "grad_norm": 0.554309743511386, | |
| "learning_rate": 2.9556046960106997e-07, | |
| "loss": 0.3275, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 2.709823449524672, | |
| "grad_norm": 0.5441084268121339, | |
| "learning_rate": 2.837817382985375e-07, | |
| "loss": 0.3265, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 2.715859363211106, | |
| "grad_norm": 0.5661752729331364, | |
| "learning_rate": 2.722356521761188e-07, | |
| "loss": 0.3251, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.7218952768975404, | |
| "grad_norm": 0.5284497315283775, | |
| "learning_rate": 2.6092278078788004e-07, | |
| "loss": 0.3249, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 2.7279311905839747, | |
| "grad_norm": 0.52978683625873, | |
| "learning_rate": 2.4984368218369305e-07, | |
| "loss": 0.3282, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 2.733967104270409, | |
| "grad_norm": 0.5435219044017648, | |
| "learning_rate": 2.389989028817108e-07, | |
| "loss": 0.3283, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 2.7400030179568433, | |
| "grad_norm": 0.5516780362582209, | |
| "learning_rate": 2.2838897784140612e-07, | |
| "loss": 0.3274, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 2.7460389316432776, | |
| "grad_norm": 0.546719555306795, | |
| "learning_rate": 2.1801443043718285e-07, | |
| "loss": 0.3298, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.752074845329712, | |
| "grad_norm": 0.5563980632574993, | |
| "learning_rate": 2.0787577243255807e-07, | |
| "loss": 0.3267, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 2.758110759016146, | |
| "grad_norm": 0.535638021015215, | |
| "learning_rate": 1.9797350395492077e-07, | |
| "loss": 0.3253, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 2.76414667270258, | |
| "grad_norm": 0.5347609121819951, | |
| "learning_rate": 1.8830811347085697e-07, | |
| "loss": 0.3252, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 2.770182586389015, | |
| "grad_norm": 0.5415863482391344, | |
| "learning_rate": 1.788800777620542e-07, | |
| "loss": 0.3276, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 2.7762185000754487, | |
| "grad_norm": 0.5466212280219622, | |
| "learning_rate": 1.6968986190178728e-07, | |
| "loss": 0.326, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.7822544137618834, | |
| "grad_norm": 0.5391843573715891, | |
| "learning_rate": 1.60737919231973e-07, | |
| "loss": 0.3265, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 2.7882903274483173, | |
| "grad_norm": 0.5465887305789703, | |
| "learning_rate": 1.5202469134080633e-07, | |
| "loss": 0.3291, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 2.794326241134752, | |
| "grad_norm": 0.5447449635613493, | |
| "learning_rate": 1.4355060804098043e-07, | |
| "loss": 0.3254, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 2.800362154821186, | |
| "grad_norm": 0.5376834372862567, | |
| "learning_rate": 1.3531608734848433e-07, | |
| "loss": 0.3252, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 2.80639806850762, | |
| "grad_norm": 0.5419447242645747, | |
| "learning_rate": 1.273215354619789e-07, | |
| "loss": 0.3277, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.8124339821940545, | |
| "grad_norm": 0.521436211709283, | |
| "learning_rate": 1.1956734674276492e-07, | |
| "loss": 0.3267, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 2.818469895880489, | |
| "grad_norm": 0.5443036316275357, | |
| "learning_rate": 1.1205390369532553e-07, | |
| "loss": 0.328, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 2.824505809566923, | |
| "grad_norm": 0.5736771187575125, | |
| "learning_rate": 1.0478157694846002e-07, | |
| "loss": 0.3269, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 2.8305417232533574, | |
| "grad_norm": 0.5533030963421177, | |
| "learning_rate": 9.775072523700135e-08, | |
| "loss": 0.3274, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 2.8365776369397917, | |
| "grad_norm": 0.5297867847542854, | |
| "learning_rate": 9.096169538411747e-08, | |
| "loss": 0.3251, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.842613550626226, | |
| "grad_norm": 0.5603590658940372, | |
| "learning_rate": 8.441482228420505e-08, | |
| "loss": 0.3261, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 2.8486494643126603, | |
| "grad_norm": 0.5648155137748375, | |
| "learning_rate": 7.81104288863721e-08, | |
| "loss": 0.3238, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 2.8546853779990946, | |
| "grad_norm": 0.5155233113764542, | |
| "learning_rate": 7.204882617850129e-08, | |
| "loss": 0.3284, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 2.860721291685529, | |
| "grad_norm": 0.5283055469638852, | |
| "learning_rate": 6.623031317191386e-08, | |
| "loss": 0.3243, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 2.866757205371963, | |
| "grad_norm": 0.5324757215458941, | |
| "learning_rate": 6.065517688661926e-08, | |
| "loss": 0.3266, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 2.8727931190583975, | |
| "grad_norm": 0.5270694862009192, | |
| "learning_rate": 5.532369233715418e-08, | |
| "loss": 0.3263, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 2.878829032744832, | |
| "grad_norm": 0.5184850936640313, | |
| "learning_rate": 5.02361225190201e-08, | |
| "loss": 0.325, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 2.884864946431266, | |
| "grad_norm": 0.5522807685327075, | |
| "learning_rate": 4.539271839570702e-08, | |
| "loss": 0.3303, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 2.8909008601177004, | |
| "grad_norm": 0.5833975533295399, | |
| "learning_rate": 4.079371888631667e-08, | |
| "loss": 0.3287, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 2.8969367738041347, | |
| "grad_norm": 0.5435014494666157, | |
| "learning_rate": 3.643935085377193e-08, | |
| "loss": 0.3291, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.902972687490569, | |
| "grad_norm": 0.5317951774661862, | |
| "learning_rate": 3.232982909363247e-08, | |
| "loss": 0.3302, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 2.9090086011770033, | |
| "grad_norm": 0.5470417295465569, | |
| "learning_rate": 2.8465356323494897e-08, | |
| "loss": 0.3293, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 2.915044514863437, | |
| "grad_norm": 0.5361189628769133, | |
| "learning_rate": 2.4846123172992953e-08, | |
| "loss": 0.3281, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 2.921080428549872, | |
| "grad_norm": 0.54177713240335, | |
| "learning_rate": 2.147230817439616e-08, | |
| "loss": 0.326, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 2.927116342236306, | |
| "grad_norm": 0.5323052126594137, | |
| "learning_rate": 1.834407775380187e-08, | |
| "loss": 0.3281, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 2.9331522559227405, | |
| "grad_norm": 0.5359903004283559, | |
| "learning_rate": 1.5461586222924596e-08, | |
| "loss": 0.3261, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 2.9391881696091744, | |
| "grad_norm": 0.5543427271655068, | |
| "learning_rate": 1.2824975771486558e-08, | |
| "loss": 0.3264, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 2.945224083295609, | |
| "grad_norm": 0.5384618149718552, | |
| "learning_rate": 1.0434376460201067e-08, | |
| "loss": 0.3271, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 2.951259996982043, | |
| "grad_norm": 0.5121547980752482, | |
| "learning_rate": 8.289906214358767e-09, | |
| "loss": 0.3252, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 2.9572959106684773, | |
| "grad_norm": 0.5126843579972032, | |
| "learning_rate": 6.391670818008955e-09, | |
| "loss": 0.3255, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.9633318243549116, | |
| "grad_norm": 0.5323524113852374, | |
| "learning_rate": 4.7397639087432e-09, | |
| "loss": 0.3267, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 2.969367738041346, | |
| "grad_norm": 0.5324354968490875, | |
| "learning_rate": 3.3342669730729303e-09, | |
| "loss": 0.3255, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 2.97540365172778, | |
| "grad_norm": 0.550825930999869, | |
| "learning_rate": 2.1752493424148647e-09, | |
| "loss": 0.328, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 2.9814395654142145, | |
| "grad_norm": 0.5289245768111625, | |
| "learning_rate": 1.2627681896670852e-09, | |
| "loss": 0.3265, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 2.987475479100649, | |
| "grad_norm": 0.5548931035545003, | |
| "learning_rate": 5.968685263885165e-10, | |
| "loss": 0.329, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 2.993511392787083, | |
| "grad_norm": 0.5268141741402684, | |
| "learning_rate": 1.7758320058236522e-10, | |
| "loss": 0.3264, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 2.9995473064735174, | |
| "grad_norm": 0.5394909098085136, | |
| "learning_rate": 4.932895071863009e-12, | |
| "loss": 0.3267, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 4971, | |
| "total_flos": 3906508525600768.0, | |
| "train_loss": 0.3714317911300974, | |
| "train_runtime": 271631.3463, | |
| "train_samples_per_second": 4.684, | |
| "train_steps_per_second": 0.018 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 4971, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3906508525600768.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |