Lanni-ni's picture
add remote code + model files
d0f2865 verified
{"step": 20971520, "pnorm/_forward_module.model.embeddings.weight": 72.05237579345703, "gnorm/_forward_module.model.embeddings.weight": 0.06038488447666168, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.969755172729492, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.001855711336247623, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 5.1563520431518555, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.003524532774463296, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 5.182712554931641, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.0035395559389144182, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.136830806732178, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.04442397132515907, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.12950325012207, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.04780397564172745, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.6290377974510193, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.007245807442814112, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.006396627519279718, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.003113751532509923, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.037145614624023, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0009169657132588327, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 12.70312213897705, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.0305657796561718, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 8.957891464233398, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.029606034979224205, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 15.993705749511719, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.001568031613714993, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 5.175420761108398, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.003415534505620599, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 5.1748738288879395, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.003269094740971923, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.174013614654541, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.05101334676146507, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 5.178831577301025, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.10206922888755798, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.6649390459060669, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.021807961165905, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.006190520711243153, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0018073201645165682, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.034954071044922, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0014780916972085834, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 12.680872917175293, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.0415254607796669, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 8.979910850524902, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.04571067541837692, "pnorm/_forward_module.model.norm.weight": 16.01601791381836, "gnorm/_forward_module.model.norm.weight": 0.0625680461525917, "pnorm/_forward_module.lm_head.weight": 71.93384552001953, "gnorm/_forward_module.lm_head.weight": 0.9843456149101257}
{"step": 41943040, "pnorm/_forward_module.model.embeddings.weight": 73.49058532714844, "gnorm/_forward_module.model.embeddings.weight": 0.061607781797647476, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.944855690002441, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0018161119660362601, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 5.433432102203369, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.003969126846641302, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 5.461900234222412, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.003255331888794899, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.286075115203857, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.03514140844345093, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.279037952423096, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.036937784403562546, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.6584036946296692, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.007936905138194561, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.00843839906156063, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0024757529608905315, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.098344802856445, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0011970505584031343, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 13.293915748596191, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.03391759470105171, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 9.34956169128418, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.02449009194970131, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 15.989734649658203, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.001276906463317573, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 5.448599338531494, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.0015077836578711867, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 5.4395928382873535, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.0016058466862887144, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.3501667976379395, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.021339863538742065, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 5.355653285980225, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.036692313849925995, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.6772328019142151, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.0033541680313646793, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.009584035724401474, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00019561999943107367, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.04805564880371, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.001032502157613635, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 13.12743854522705, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.01960146054625511, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 9.297164916992188, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.023524856194853783, "pnorm/_forward_module.model.norm.weight": 16.15458869934082, "gnorm/_forward_module.model.norm.weight": 0.07690682262182236, "pnorm/_forward_module.lm_head.weight": 80.11444091796875, "gnorm/_forward_module.lm_head.weight": 0.9487274289131165}
{"step": 62914560, "pnorm/_forward_module.model.embeddings.weight": 74.88994598388672, "gnorm/_forward_module.model.embeddings.weight": 0.07597663253545761, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.972419738769531, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.004672551527619362, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 5.755270957946777, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.0049467491917312145, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 5.780837535858154, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.004270249977707863, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.457310676574707, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.10007039457559586, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.443675994873047, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.14211130142211914, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.7366576790809631, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.009451810270547867, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.006736272014677525, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.001875121844932437, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.118322372436523, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.008479232899844646, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 13.626164436340332, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.2150479406118393, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 9.601815223693848, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.14681793749332428, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 15.972100257873535, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.008661371655762196, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 5.617506980895996, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.005076989531517029, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 5.60673189163208, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.0074254898354411125, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.43977165222168, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.1138111799955368, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 5.455945014953613, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.09058065712451935, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.7070551514625549, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.008187072351574898, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.009798632003366947, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00047067314153537154, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.024097442626953, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.004171210806816816, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 13.378179550170898, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.1020917072892189, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 9.497102737426758, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.061179760843515396, "pnorm/_forward_module.model.norm.weight": 16.303817749023438, "gnorm/_forward_module.model.norm.weight": 0.018896114081144333, "pnorm/_forward_module.lm_head.weight": 90.32892608642578, "gnorm/_forward_module.lm_head.weight": 0.19551944732666016}
{"step": 83886080, "pnorm/_forward_module.model.embeddings.weight": 76.44368743896484, "gnorm/_forward_module.model.embeddings.weight": 0.07930849492549896, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.01687240600586, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.005513214971870184, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 6.049816131591797, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.006496718619018793, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 6.073756217956543, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.006670854985713959, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.6009626388549805, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.11010660231113434, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.573078155517578, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.13728652894496918, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.8040162324905396, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.015027034096419811, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.012285887263715267, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0029063045512884855, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.12519073486328, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.006780239287763834, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 13.761842727661133, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.16053345799446106, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 9.72683048248291, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.11886771023273468, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 15.95832633972168, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.006680781487375498, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 5.733181953430176, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.010307379066944122, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 5.701542377471924, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.011362992227077484, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.457224369049072, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.1043073832988739, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 5.485989570617676, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.09152856469154358, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.7329626679420471, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.008318386971950531, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.012177489697933197, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0003090408572461456, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 15.997032165527344, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.007062331773340702, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 13.46699047088623, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.15480788052082062, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 9.57370376586914, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.07746879011392593, "pnorm/_forward_module.model.norm.weight": 16.426965713500977, "gnorm/_forward_module.model.norm.weight": 0.019787102937698364, "pnorm/_forward_module.lm_head.weight": 98.42568969726562, "gnorm/_forward_module.lm_head.weight": 0.17325283586978912}
{"step": 104857600, "pnorm/_forward_module.model.embeddings.weight": 78.11286926269531, "gnorm/_forward_module.model.embeddings.weight": 0.16937188804149628, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.053125381469727, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.009213138371706009, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 6.295784950256348, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.009306230582296848, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 6.325882911682129, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.008344834670424461, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.716864109039307, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.1718684285879135, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.674732685089111, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.21003493666648865, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.829669713973999, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.010988417081534863, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.017017638310790062, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0005561288562603295, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.12384605407715, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.008976465091109276, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 13.848369598388672, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.22322280704975128, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 9.823541641235352, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.15843050181865692, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 15.9498291015625, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.010773612186312675, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 5.81465482711792, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.01143778208643198, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 5.769066333770752, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.019891217350959778, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.470144748687744, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.17998896539211273, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 5.506825923919678, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.1316508799791336, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.7614491581916809, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.019970055669546127, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.016588453203439713, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0004582771216519177, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 15.980132102966309, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.00909390114247799, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 13.523462295532227, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.2083943635225296, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 9.6212739944458, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.09244253486394882, "pnorm/_forward_module.model.norm.weight": 16.54207992553711, "gnorm/_forward_module.model.norm.weight": 0.016870953142642975, "pnorm/_forward_module.lm_head.weight": 104.65542602539062, "gnorm/_forward_module.lm_head.weight": 0.12114791572093964}
{"step": 125829120, "pnorm/_forward_module.model.embeddings.weight": 79.67562866210938, "gnorm/_forward_module.model.embeddings.weight": 0.27076810598373413, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.08155059814453, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.007802056148648262, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 6.5190958976745605, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.02886628918349743, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 6.555778980255127, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.02700858935713768, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.798874378204346, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.16170728206634521, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.743980407714844, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.1717504858970642, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.8584901094436646, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.02907814458012581, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.021339694038033485, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.002385492902249098, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.12078094482422, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.004597479477524757, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 13.912531852722168, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.11478620022535324, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 9.907936096191406, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.07405915856361389, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 15.948895454406738, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.004754727240651846, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 5.897298812866211, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.02250438928604126, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 5.840423583984375, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.030659666284918785, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.489763259887695, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.08445242047309875, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 5.535558223724365, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.06140395253896713, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.7939058542251587, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.04045616090297699, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.023980583995580673, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.004436281975358725, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 15.969919204711914, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0035182619467377663, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 13.57705307006836, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.07175486534833908, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 9.666468620300293, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03958219662308693, "pnorm/_forward_module.model.norm.weight": 16.661285400390625, "gnorm/_forward_module.model.norm.weight": 0.032290779054164886, "pnorm/_forward_module.lm_head.weight": 109.74832153320312, "gnorm/_forward_module.lm_head.weight": 0.14223547279834747}
{"step": 146800640, "pnorm/_forward_module.model.embeddings.weight": 81.10012817382812, "gnorm/_forward_module.model.embeddings.weight": 0.3972567021846771, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.10307502746582, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.011317462660372257, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 6.701213359832764, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.06323665380477905, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 6.745156764984131, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.06785913556814194, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.8558349609375, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.25484606623649597, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.79291296005249, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.28168877959251404, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.894920289516449, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.10865162312984467, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.025242270901799202, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.007006607949733734, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.115859985351562, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.007040046155452728, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 13.964837074279785, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.16630573570728302, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 9.97901439666748, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.10329962521791458, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 15.951245307922363, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.007102874107658863, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 5.955022811889648, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.029228556901216507, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 5.8880157470703125, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.041769176721572876, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.515825271606445, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.1492537409067154, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 5.5728678703308105, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.11556188762187958, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.8285028338432312, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.058868274092674255, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.03320124000310898, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00421054195612669, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 15.9663667678833, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.007122169714421034, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 13.64005184173584, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.14873653650283813, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 9.71642780303955, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.06751801818609238, "pnorm/_forward_module.model.norm.weight": 16.787813186645508, "gnorm/_forward_module.model.norm.weight": 0.02028297260403633, "pnorm/_forward_module.lm_head.weight": 114.40328216552734, "gnorm/_forward_module.lm_head.weight": 0.16569149494171143}
{"step": 167772160, "pnorm/_forward_module.model.embeddings.weight": 82.40795135498047, "gnorm/_forward_module.model.embeddings.weight": 0.2025238573551178, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.11884880065918, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.006921234540641308, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 6.843315124511719, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.03999471291899681, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 6.889676570892334, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.04167145863175392, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.891746997833252, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.157960906624794, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.824216365814209, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.18729394674301147, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.9387774467468262, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.06175748631358147, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0299394391477108, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.004272074904292822, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.110490798950195, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.005577956326305866, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.008129119873047, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.12842847406864166, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.033875465393066, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.09589163213968277, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 15.95638656616211, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.006718477699905634, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 5.991936206817627, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.02173023670911789, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 5.924596786499023, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.030541986227035522, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.545361042022705, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.14545053243637085, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 5.615042686462402, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.10669176280498505, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.8737874627113342, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.042894329875707626, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.04450457915663719, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.005045986268669367, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 15.966714859008789, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.006270585581660271, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 13.714472770690918, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.1320708692073822, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 9.774035453796387, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.06698741763830185, "pnorm/_forward_module.model.norm.weight": 16.91901969909668, "gnorm/_forward_module.model.norm.weight": 0.03498150035738945, "pnorm/_forward_module.lm_head.weight": 118.95142364501953, "gnorm/_forward_module.lm_head.weight": 0.13400687277317047}
{"step": 188743680, "pnorm/_forward_module.model.embeddings.weight": 83.62446594238281, "gnorm/_forward_module.model.embeddings.weight": 0.16319693624973297, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.129310607910156, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.005269177723675966, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 6.9655537605285645, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.013441052287817001, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.010619640350342, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.013712843880057335, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.911289691925049, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.1497485637664795, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.842068195343018, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.1703694462776184, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.9875470399856567, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.010587908327579498, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.03633253648877144, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0012599150650203228, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.10809326171875, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.003632865846157074, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.050271034240723, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.09012207388877869, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.078251838684082, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.06709024310112, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 15.966767311096191, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.004358288366347551, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 6.024993896484375, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.013733034953474998, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 5.951837539672852, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.026359710842370987, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.578583717346191, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.1106235533952713, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 5.663088798522949, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.09147733449935913, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.9445444345474243, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.019813520833849907, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.05606655776500702, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.002734360285103321, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 15.967580795288086, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.003997805994004011, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 13.793850898742676, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.07483652234077454, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 9.824563026428223, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.04269333928823471, "pnorm/_forward_module.model.norm.weight": 17.050506591796875, "gnorm/_forward_module.model.norm.weight": 0.024963296949863434, "pnorm/_forward_module.lm_head.weight": 123.4576187133789, "gnorm/_forward_module.lm_head.weight": 0.0917944610118866}
{"step": 209715200, "pnorm/_forward_module.model.embeddings.weight": 84.76009368896484, "gnorm/_forward_module.model.embeddings.weight": 0.21199177205562592, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.13445472717285, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.008684784173965454, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.056536674499512, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.033695612102746964, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.099299430847168, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.03227211534976959, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.919482231140137, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.24651245772838593, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.849952220916748, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.29593709111213684, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.025898814201355, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.051366325467824936, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.04175814986228943, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.003684332827106118, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.103702545166016, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.005320836789906025, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.08340835571289, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.15071050822734833, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.110928535461426, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.1516065150499344, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 15.980895042419434, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.011889134533703327, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 6.048890113830566, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.03904469311237335, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 5.971392631530762, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.05655384808778763, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.6173810958862305, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.2533377707004547, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 5.716982841491699, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.17985288798809052, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.0318337678909302, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.05212240293622017, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.06668494641780853, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.005436921026557684, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 15.971988677978516, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.007158719468861818, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 13.87979507446289, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.15860435366630554, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 9.882991790771484, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.08661124855279922, "pnorm/_forward_module.model.norm.weight": 17.184024810791016, "gnorm/_forward_module.model.norm.weight": 0.015842411667108536, "pnorm/_forward_module.lm_head.weight": 128.03253173828125, "gnorm/_forward_module.lm_head.weight": 0.10422592610120773}
{"step": 230686720, "pnorm/_forward_module.model.embeddings.weight": 85.82051849365234, "gnorm/_forward_module.model.embeddings.weight": 0.1840544193983078, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.138134002685547, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.006066782400012016, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.134839057922363, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.023177703842520714, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.173806667327881, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.02293059229850769, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.921664237976074, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.18565498292446136, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.853517055511475, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.21482443809509277, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.0567618608474731, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.024464592337608337, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.047183871269226074, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0026294367853552103, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.098127365112305, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.004133800510317087, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.110907554626465, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.10518231242895126, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.134795188903809, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.09959756582975388, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 15.997269630432129, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.006588327698409557, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 6.071419715881348, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.02714446745812893, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 5.989881992340088, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.04000772908329964, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.657135963439941, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.16074854135513306, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 5.769173622131348, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.12271121144294739, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.1232587099075317, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.014214995317161083, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.07616313546895981, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0014605352189391851, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 15.97897720336914, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0038810824044048786, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 13.971722602844238, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.08428943902254105, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 9.939470291137695, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.05467931926250458, "pnorm/_forward_module.model.norm.weight": 17.3179988861084, "gnorm/_forward_module.model.norm.weight": 0.028649963438510895, "pnorm/_forward_module.lm_head.weight": 132.6510009765625, "gnorm/_forward_module.lm_head.weight": 0.08684412389993668}
{"step": 251658240, "pnorm/_forward_module.model.embeddings.weight": 86.80840301513672, "gnorm/_forward_module.model.embeddings.weight": 0.16536845266819, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.142473220825195, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.007211247459053993, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.210788726806641, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.01950487121939659, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.244894981384277, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.024354835972189903, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.92273473739624, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.1915964037179947, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.855989456176758, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.2279568612575531, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.0919400453567505, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.01625225879251957, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.052082791924476624, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0016527462285012007, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.092893600463867, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0054925880394876, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.13753604888916, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.12882590293884277, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.154690742492676, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.11033093184232712, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.01178741455078, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.008818302303552628, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 6.10203218460083, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.04394018277525902, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 6.013166904449463, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.05901181325316429, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.690598011016846, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.1892859786748886, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 5.813355922698975, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.1347924768924713, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.2036337852478027, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.008893336169421673, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.08392015844583511, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0007922466029413044, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 15.98725414276123, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.004315048456192017, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 14.065461158752441, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.11211855709552765, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 9.995488166809082, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.06543305516242981, "pnorm/_forward_module.model.norm.weight": 17.45160484313965, "gnorm/_forward_module.model.norm.weight": 0.021481908857822418, "pnorm/_forward_module.lm_head.weight": 137.2794189453125, "gnorm/_forward_module.lm_head.weight": 0.08165688812732697}
{"step": 272629760, "pnorm/_forward_module.model.embeddings.weight": 87.73100280761719, "gnorm/_forward_module.model.embeddings.weight": 0.24232202768325806, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.145559310913086, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.010309348814189434, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.280605792999268, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.027465231716632843, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.310159206390381, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.027734674513339996, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.920651912689209, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.2892310917377472, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.855778217315674, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.3249877095222473, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.1241724491119385, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.021279318258166313, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.05778597667813301, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0024189064279198647, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.088748931884766, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.008235842920839787, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.165690422058105, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.2035948932170868, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.17336654663086, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.1687595397233963, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.025407791137695, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.010686378926038742, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 6.143698215484619, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.044652096927165985, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 6.044376850128174, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.07976394146680832, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.718111038208008, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.27623194456100464, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 5.851855278015137, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.16540488600730896, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.272892951965332, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.020671509206295013, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.09083624929189682, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00290342653170228, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 15.999034881591797, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0050935824401676655, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 14.164361000061035, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.12197272479534149, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.057548522949219, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.08286716789007187, "pnorm/_forward_module.model.norm.weight": 17.586463928222656, "gnorm/_forward_module.model.norm.weight": 0.027356814593076706, "pnorm/_forward_module.lm_head.weight": 141.8426513671875, "gnorm/_forward_module.lm_head.weight": 0.08379202336072922}
{"step": 293601280, "pnorm/_forward_module.model.embeddings.weight": 88.5937271118164, "gnorm/_forward_module.model.embeddings.weight": 0.23741649091243744, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.145029067993164, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.008575675077736378, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.3357462882995605, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.03034127689898014, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.363217830657959, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.026033489033579826, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.914319038391113, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.23114928603172302, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.851653575897217, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.26766741275787354, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.1541799306869507, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.03323756903409958, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.06321863830089569, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.002141643315553665, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.087194442749023, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0049826595932245255, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.195695877075195, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.1433328092098236, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.191606521606445, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.12941396236419678, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.041637420654297, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.010541539639234543, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 6.202450275421143, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.04479234665632248, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 6.087597846984863, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.07001674920320511, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.74199104309082, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.25496232509613037, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 5.888543128967285, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.15379978716373444, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.3386846780776978, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.026703402400016785, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.09742366522550583, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.002520937006920576, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.015594482421875, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.00707329111173749, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 14.271105766296387, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.1697213500738144, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.123900413513184, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.1117330938577652, "pnorm/_forward_module.model.norm.weight": 17.723388671875, "gnorm/_forward_module.model.norm.weight": 0.01274515874683857, "pnorm/_forward_module.lm_head.weight": 146.3300323486328, "gnorm/_forward_module.lm_head.weight": 0.0890781506896019}
{"step": 314572800, "pnorm/_forward_module.model.embeddings.weight": 89.4027328491211, "gnorm/_forward_module.model.embeddings.weight": 0.25835156440734863, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.147722244262695, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.01049479003995657, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.391076564788818, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.024725673720240593, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.416975498199463, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.027991456910967827, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.912836074829102, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.33850523829460144, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.851629734039307, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.3602639138698578, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.1793168783187866, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.028827449306845665, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.06770110875368118, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.002850713673979044, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.083271026611328, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.00572655163705349, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.21980094909668, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.17735016345977783, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.204890251159668, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.1820315718650818, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.056869506835938, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.013876463286578655, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 6.265024662017822, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.03723054379224777, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 6.132700443267822, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.06430535763502121, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.761787414550781, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.3246208727359772, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 5.918417930603027, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.17749464511871338, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.392572045326233, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.015143428929150105, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.10286345332860947, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0017571203643456101, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.03017807006836, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.005847745109349489, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 14.36701488494873, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.13090787827968597, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.178596496582031, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.09486467391252518, "pnorm/_forward_module.model.norm.weight": 17.855567932128906, "gnorm/_forward_module.model.norm.weight": 0.023925915360450745, "pnorm/_forward_module.lm_head.weight": 150.6578369140625, "gnorm/_forward_module.lm_head.weight": 0.10077065974473953}
{"step": 335544320, "pnorm/_forward_module.model.embeddings.weight": 90.16848754882812, "gnorm/_forward_module.model.embeddings.weight": 0.22858496010303497, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.14771842956543, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.007369040045887232, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.438493728637695, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.019021548330783844, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.462460041046143, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.02120693400502205, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.908205509185791, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.22637203335762024, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.848961353302002, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.2266196757555008, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.20077383518219, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.025637401267886162, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.07301612198352814, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.004814534913748503, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.08165168762207, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0036457194946706295, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.245586395263672, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.10910625010728836, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.218076705932617, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.0956534594297409, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.07406997680664, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.006100333295762539, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 6.333832740783691, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.023793961852788925, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 6.182105541229248, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.040050458163022995, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.78040885925293, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.15824654698371887, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 5.947714328765869, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.10512601584196091, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.448504090309143, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.01833057776093483, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.10900429636240005, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.002136590890586376, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.04541015625, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.003999635577201843, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 14.459443092346191, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.09439331293106079, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.229787826538086, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.05881432443857193, "pnorm/_forward_module.model.norm.weight": 17.988439559936523, "gnorm/_forward_module.model.norm.weight": 0.017126301303505898, "pnorm/_forward_module.lm_head.weight": 154.8427276611328, "gnorm/_forward_module.lm_head.weight": 0.06683126837015152}
{"step": 356515840, "pnorm/_forward_module.model.embeddings.weight": 90.89396667480469, "gnorm/_forward_module.model.embeddings.weight": 0.14121636748313904, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.14615249633789, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.006318354979157448, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.479001045227051, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.018388787284493446, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.5011372566223145, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.019070573151111603, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.901999473571777, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.1914856731891632, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.84480094909668, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.2117525041103363, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2185012102127075, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.016329944133758545, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.07806332409381866, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.001482911640778184, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.079240798950195, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0033970747608691454, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.268656730651855, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.10296657681465149, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.228809356689453, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.09591896831989288, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.09156036376953, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.006628477014601231, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 6.41030216217041, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.02173665538430214, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 6.23518180847168, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.03760357201099396, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.798464775085449, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.15050970017910004, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 5.976438045501709, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.09879400581121445, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.4820362329483032, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.007722839713096619, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.11268781125545502, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0005766506073996425, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.062759399414062, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0028284413274377584, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 14.551603317260742, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.07456057518720627, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.282119750976562, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.04918181523680687, "pnorm/_forward_module.model.norm.weight": 18.122987747192383, "gnorm/_forward_module.model.norm.weight": 0.01923101767897606, "pnorm/_forward_module.lm_head.weight": 158.89418029785156, "gnorm/_forward_module.lm_head.weight": 0.0542321652173996}
{"step": 377487360, "pnorm/_forward_module.model.embeddings.weight": 91.5799789428711, "gnorm/_forward_module.model.embeddings.weight": 0.23439696431159973, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.14375877380371, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.009188073687255383, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.512201309204102, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.025147074833512306, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.532954216003418, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.028735561296343803, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.89562463760376, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.28092071413993835, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.840109348297119, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.31297871470451355, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.234147310256958, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.03071645088493824, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.08346227556467056, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0014316203305497766, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.078081130981445, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.005763879511505365, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.292433738708496, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.15703894197940826, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.239374160766602, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.15120942890644073, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.108749389648438, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.011317712254822254, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 6.4871721267700195, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.03530982881784439, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 6.287702560424805, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.0693540871143341, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.814743995666504, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.2719857394695282, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.002852916717529, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.17057110369205475, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.5213823318481445, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.018462006002664566, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.11707223951816559, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0016707928152754903, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.08072853088379, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.006646385416388512, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 14.639954566955566, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.17213216423988342, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.332590103149414, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.10172130167484283, "pnorm/_forward_module.model.norm.weight": 18.256534576416016, "gnorm/_forward_module.model.norm.weight": 0.024371275678277016, "pnorm/_forward_module.lm_head.weight": 162.77151489257812, "gnorm/_forward_module.lm_head.weight": 0.09735913574695587}
{"step": 398458880, "pnorm/_forward_module.model.embeddings.weight": 92.2322769165039, "gnorm/_forward_module.model.embeddings.weight": 0.24870066344738007, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.142486572265625, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.009367205202579498, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.544608116149902, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.0271880142390728, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.565149307250977, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.025796176865696907, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.890977382659912, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.303481787443161, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.837011814117432, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.3376403748989105, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2450917959213257, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.03664330393075943, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.08760926127433777, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.001996302045881748, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.075708389282227, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.005454336758702993, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.311745643615723, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.16796565055847168, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.246405601501465, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.15342512726783752, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.12604522705078, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.011552946642041206, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 6.563150405883789, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.033611755818128586, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 6.337704658508301, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.06691266596317291, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.8299784660339355, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.25780341029167175, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.02716588973999, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.14455090463161469, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.5584321022033691, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.00997294019907713, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.12108348309993744, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0010004936484619975, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.094831466674805, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.004815593361854553, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 14.715835571289062, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.12684142589569092, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.372509956359863, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.08536971360445023, "pnorm/_forward_module.model.norm.weight": 18.387067794799805, "gnorm/_forward_module.model.norm.weight": 0.014719798229634762, "pnorm/_forward_module.lm_head.weight": 166.4724578857422, "gnorm/_forward_module.lm_head.weight": 0.060370054095983505}
{"step": 419430400, "pnorm/_forward_module.model.embeddings.weight": 92.85285186767578, "gnorm/_forward_module.model.embeddings.weight": 0.19071532785892487, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.13833236694336, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.006551853846758604, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.574807167053223, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.03646489977836609, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.594360828399658, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.03265569731593132, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.882615566253662, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.20909512042999268, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.830440044403076, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.22340095043182373, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2534639835357666, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.04857994243502617, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.09184589236974716, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0036323009990155697, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.0740909576416, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.003265694947913289, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.330657958984375, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.1142619177699089, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.253521919250488, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.10754545778036118, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.145389556884766, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.006775465793907642, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 6.639438629150391, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.023652901872992516, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 6.387392997741699, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.040627703070640564, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.846147060394287, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.17154112458229065, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.052713871002197, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.10318370163440704, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.5926802158355713, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.008532107807695866, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.12432464957237244, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0007145332056097686, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.112295150756836, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0034234696067869663, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 14.793949127197266, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.09382154792547226, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.415181159973145, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.06875316798686981, "pnorm/_forward_module.model.norm.weight": 18.51799201965332, "gnorm/_forward_module.model.norm.weight": 0.017061227932572365, "pnorm/_forward_module.lm_head.weight": 169.9793243408203, "gnorm/_forward_module.lm_head.weight": 0.051629096269607544}
{"step": 440401920, "pnorm/_forward_module.model.embeddings.weight": 93.44153594970703, "gnorm/_forward_module.model.embeddings.weight": 0.20235177874565125, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.13605308532715, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.008770341984927654, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.6050615310668945, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.024122925475239754, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.622251987457275, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.026749055832624435, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.876957893371582, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.29352521896362305, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.825939655303955, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.31368008255958557, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2623562812805176, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.027939561754465103, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.09574870020151138, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0027158609591424465, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.071765899658203, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.004416641313582659, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.348091125488281, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.15586477518081665, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.260726928710938, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.16134996712207794, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.165922164916992, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.01028165128082037, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 6.725041389465332, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.03005221113562584, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 6.4409894943237305, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.05953496694564819, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.860496997833252, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.27338314056396484, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.075718402862549, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.1508130133152008, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.6332329511642456, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.015237169340252876, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.12775494158267975, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0019553194288164377, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.127439498901367, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.005100429989397526, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 14.864448547363281, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.1321902871131897, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.453117370605469, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.08750463277101517, "pnorm/_forward_module.model.norm.weight": 18.64663314819336, "gnorm/_forward_module.model.norm.weight": 0.016924038529396057, "pnorm/_forward_module.lm_head.weight": 173.29327392578125, "gnorm/_forward_module.lm_head.weight": 0.0799047201871872}
{"step": 461373440, "pnorm/_forward_module.model.embeddings.weight": 94.00251007080078, "gnorm/_forward_module.model.embeddings.weight": 0.18276256322860718, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.131418228149414, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.006807558238506317, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.629365921020508, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.027026822790503502, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.645265102386475, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.02589160017669201, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.86925745010376, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.2216644287109375, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.819730758666992, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.24460923671722412, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2663227319717407, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.04569048061966896, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.099234439432621, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0036534531973302364, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.069828033447266, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.003797166980803013, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.364992141723633, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.12680019438266754, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.268211364746094, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.10956178605556488, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.188566207885742, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.007756480481475592, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 6.8157525062561035, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.022618860006332397, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 6.496270179748535, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.047540925443172455, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.875314712524414, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.1739145815372467, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.0987701416015625, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.10531287640333176, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.6767663955688477, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.011981161311268806, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.13121762871742249, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0013962923549115658, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.14323616027832, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0035912555176764727, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 14.932988166809082, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.09816188365221024, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.490575790405273, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.06795324385166168, "pnorm/_forward_module.model.norm.weight": 18.774126052856445, "gnorm/_forward_module.model.norm.weight": 0.017625225707888603, "pnorm/_forward_module.lm_head.weight": 176.4181365966797, "gnorm/_forward_module.lm_head.weight": 0.060488589107990265}
{"step": 482344960, "pnorm/_forward_module.model.embeddings.weight": 94.53687286376953, "gnorm/_forward_module.model.embeddings.weight": 0.23293745517730713, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.12751007080078, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.00979567039757967, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.652945518493652, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.02339017391204834, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.6681342124938965, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.026332011446356773, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.862423896789551, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.33439433574676514, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.814183712005615, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.3625766336917877, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2706624269485474, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.029478365555405617, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.1021341010928154, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0034781433641910553, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.06745147705078, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0056021190248429775, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.380741119384766, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.18623214960098267, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.27484130859375, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.1691756397485733, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.212291717529297, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.011105773039162159, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 6.91453218460083, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.035617899149656296, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 6.5556511878967285, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.07290422171354294, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.889091491699219, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.30362197756767273, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.120240688323975, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.16175997257232666, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.7157282829284668, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.01843256689608097, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.13419915735721588, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0028629445005208254, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.15849494934082, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.006687602493911982, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 14.9977388381958, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.16764725744724274, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.527640342712402, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.12748929858207703, "pnorm/_forward_module.model.norm.weight": 18.901926040649414, "gnorm/_forward_module.model.norm.weight": 0.020733606070280075, "pnorm/_forward_module.lm_head.weight": 179.38880920410156, "gnorm/_forward_module.lm_head.weight": 0.08528453856706619}
{"step": 503316480, "pnorm/_forward_module.model.embeddings.weight": 95.04638671875, "gnorm/_forward_module.model.embeddings.weight": 0.15532135963439941, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.122041702270508, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.006966985296458006, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.672325134277344, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.02717134915292263, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.686349868774414, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.02666059508919716, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.85491418838501, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.23515468835830688, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.807901859283447, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.2511366605758667, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2733782529830933, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.04113561660051346, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.10550642013549805, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0022787144407629967, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.065458297729492, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.003960816189646721, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.395707130432129, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.13061442971229553, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.28096866607666, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.11754515767097473, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.23788070678711, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.007954106666147709, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 7.015314102172852, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.025775950402021408, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 6.61585807800293, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.047980569303035736, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.903748512268066, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.1957806497812271, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.142185688018799, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.11624333262443542, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.7581123113632202, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.014558468945324421, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.13758933544158936, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.001041799783706665, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.173145294189453, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.004071169067174196, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.0595121383667, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.10523778945207596, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.559745788574219, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.06728274375200272, "pnorm/_forward_module.model.norm.weight": 19.026662826538086, "gnorm/_forward_module.model.norm.weight": 0.013171317987143993, "pnorm/_forward_module.lm_head.weight": 182.18899536132812, "gnorm/_forward_module.lm_head.weight": 0.05917133390903473}
{"step": 524288000, "pnorm/_forward_module.model.embeddings.weight": 95.53457641601562, "gnorm/_forward_module.model.embeddings.weight": 0.22034896910190582, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.116127014160156, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.008337992243468761, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.689245700836182, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.03099421039223671, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.702913761138916, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.03153468668460846, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.847163677215576, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.27972444891929626, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.801424026489258, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.29661962389945984, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.276160717010498, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.03266516327857971, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.1089356318116188, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.005590545944869518, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.06358528137207, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.00514281215146184, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.4102201461792, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.1522119790315628, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.286970138549805, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.13961637020111084, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.26641845703125, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.009095106273889542, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 7.124856472015381, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.028037674725055695, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 6.680570125579834, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.06301098316907883, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.917715549468994, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.21195439994335175, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.164060115814209, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.12783999741077423, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.8040308952331543, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.01214833278208971, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.14120244979858398, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0016094680177047849, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.186891555786133, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.005583187565207481, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.116930961608887, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.1352151781320572, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.589425086975098, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.09546215087175369, "pnorm/_forward_module.model.norm.weight": 19.14858627319336, "gnorm/_forward_module.model.norm.weight": 0.017785709351301193, "pnorm/_forward_module.lm_head.weight": 184.8389434814453, "gnorm/_forward_module.lm_head.weight": 0.07949421554803848}
{"step": 545259520, "pnorm/_forward_module.model.embeddings.weight": 96.00264739990234, "gnorm/_forward_module.model.embeddings.weight": 0.20117095112800598, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.10902976989746, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0071280295960605145, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.7032856941223145, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.020687788724899292, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.715653419494629, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.021103909239172935, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.838868618011475, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.23939353227615356, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.793953895568848, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.24034568667411804, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2780157327651978, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.023903880268335342, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.11243792623281479, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0019787985365837812, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.060651779174805, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0035628043115139008, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.422528266906738, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.12547312676906586, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.291552543640137, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.1071537435054779, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.298749923706055, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.007662015035748482, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 7.2497687339782715, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.02259899117052555, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 6.754214286804199, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.04498670995235443, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.932610988616943, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.17681235074996948, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.187340259552002, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.09144915640354156, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.8466691970825195, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.01845640130341053, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1444997489452362, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0028923251666128635, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.200286865234375, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0033834974747151136, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.170270919799805, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.08719339221715927, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.617233276367188, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.06491568684577942, "pnorm/_forward_module.model.norm.weight": 19.269508361816406, "gnorm/_forward_module.model.norm.weight": 0.01581449992954731, "pnorm/_forward_module.lm_head.weight": 187.33938598632812, "gnorm/_forward_module.lm_head.weight": 0.05652138963341713}
{"step": 566231040, "pnorm/_forward_module.model.embeddings.weight": 96.45067596435547, "gnorm/_forward_module.model.embeddings.weight": 0.22004501521587372, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.103315353393555, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.009375354275107384, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.717862606048584, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.025473136454820633, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.729610443115234, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.02675601840019226, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.833230018615723, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.3289332389831543, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.789061069488525, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.3475860059261322, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.278484582901001, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.02552069164812565, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.11501074582338333, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.003064599819481373, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.05790901184082, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.006465013138949871, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.434786796569824, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.18133848905563354, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.296317100524902, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.16377007961273193, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.331037521362305, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.009125569835305214, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 7.376690864562988, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.03462328016757965, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 6.830111503601074, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.06784458458423615, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.947010517120361, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.2766604721546173, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.20965576171875, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.15607833862304688, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.887609601020813, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.016413796693086624, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.14778995513916016, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0019539541099220514, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.211517333984375, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.007291567046195269, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.217272758483887, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.17318810522556305, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.641464233398438, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.11966361105442047, "pnorm/_forward_module.model.norm.weight": 19.38921546936035, "gnorm/_forward_module.model.norm.weight": 0.017861122265458107, "pnorm/_forward_module.lm_head.weight": 189.70346069335938, "gnorm/_forward_module.lm_head.weight": 0.08456363528966904}
{"step": 587202560, "pnorm/_forward_module.model.embeddings.weight": 96.88018798828125, "gnorm/_forward_module.model.embeddings.weight": 0.1735384315252304, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.097389221191406, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.007479378953576088, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.726406097412109, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.022002913057804108, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.738077640533447, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.022471435368061066, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.828475475311279, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.2592656910419464, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.784884929656982, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.2785628139972687, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.278551459312439, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.02430613711476326, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.11785029619932175, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.002355094300583005, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.053321838378906, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0037969087716192007, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.444052696228027, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.14658933877944946, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.299330711364746, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.12882286310195923, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.363475799560547, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.00909586250782013, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 7.5015716552734375, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.023639777675271034, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 6.903359889984131, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.05382176861166954, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.961294651031494, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.22143489122390747, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.2315993309021, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.11884113401174545, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.923274040222168, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.01303075347095728, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.15039731562137604, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0018774199998006225, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.220705032348633, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0044664801098406315, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.259578704833984, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.12267091870307922, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.66280746459961, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.09408437460660934, "pnorm/_forward_module.model.norm.weight": 19.506816864013672, "gnorm/_forward_module.model.norm.weight": 0.008999792858958244, "pnorm/_forward_module.lm_head.weight": 191.95277404785156, "gnorm/_forward_module.lm_head.weight": 0.05207791551947594}
{"step": 608174080, "pnorm/_forward_module.model.embeddings.weight": 97.29241180419922, "gnorm/_forward_module.model.embeddings.weight": 0.20382501184940338, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.090051651000977, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.007904560305178165, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.730472087860107, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.027826420962810516, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.74221134185791, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.03123570792376995, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.822511672973633, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.306417316198349, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.7796220779418945, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.335592657327652, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2775789499282837, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.023581864312291145, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.12047639489173889, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0016218681121245027, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.05027198791504, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.005104148294776678, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.454352378845215, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.1775524914264679, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.303309440612793, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.1624232977628708, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.39799690246582, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.010691205970942974, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 7.634333610534668, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.029091862961649895, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 6.980170249938965, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.06323990225791931, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.974838733673096, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.25570929050445557, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.252712249755859, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.13154585659503937, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.9607007503509521, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.018456030637025833, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.15323762595653534, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00257876212708652, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.23038101196289, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.004449437838047743, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.300716400146484, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.12886248528957367, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.684063911437988, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.09636989235877991, "pnorm/_forward_module.model.norm.weight": 19.6219425201416, "gnorm/_forward_module.model.norm.weight": 0.011629029177129269, "pnorm/_forward_module.lm_head.weight": 194.076904296875, "gnorm/_forward_module.lm_head.weight": 0.0853302851319313}
{"step": 629145600, "pnorm/_forward_module.model.embeddings.weight": 97.68669891357422, "gnorm/_forward_module.model.embeddings.weight": 0.1636805683374405, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.081573486328125, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.007223108317703009, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.732177734375, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.026001984253525734, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.744466781616211, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.027224862948060036, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.816336154937744, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.2571079134941101, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.774085998535156, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.27747640013694763, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2768009901046753, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.028504854068160057, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.12301230430603027, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.002434689551591873, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.04452133178711, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.004219732712954283, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.460796356201172, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.1512661576271057, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.30492877960205, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.13173426687717438, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.435611724853516, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.008819940499961376, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 7.773662567138672, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.024418124929070473, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 7.059475898742676, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.05566051974892616, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 5.9901018142700195, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.20991916954517365, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.2763166427612305, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.11321206390857697, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.0000133514404297, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.019851189106702805, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1561778336763382, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.002848095027729869, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.23982048034668, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.004457150120288134, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.340049743652344, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.11663961410522461, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.703718185424805, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.07951927930116653, "pnorm/_forward_module.model.norm.weight": 19.735898971557617, "gnorm/_forward_module.model.norm.weight": 0.010668852366507053, "pnorm/_forward_module.lm_head.weight": 196.0958251953125, "gnorm/_forward_module.lm_head.weight": 0.07121716439723969}
{"step": 650117120, "pnorm/_forward_module.model.embeddings.weight": 98.06478881835938, "gnorm/_forward_module.model.embeddings.weight": 0.15938271582126617, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.07219123840332, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.006006039213389158, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.731297492980957, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.02748878486454487, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.744622707366943, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.026533981785178185, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.8102545738220215, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.20619338750839233, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.768701553344727, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.22212035953998566, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2742520570755005, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.034607067704200745, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.12589605152606964, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.003589056199416518, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.038936614990234, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.003471721662208438, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.466378211975098, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.11965934187173843, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.306475639343262, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.09877663105726242, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.475366592407227, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.006739410571753979, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 7.9197893142700195, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.021391453221440315, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 7.140645980834961, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.05054205656051636, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.005711555480957, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.14548586308956146, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.300064563751221, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.08618379384279251, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.037837505340576, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.013110117986798286, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1591188907623291, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.001963021932169795, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.248573303222656, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.003548850305378437, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.376485824584961, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.093282550573349, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.721556663513184, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.07089327275753021, "pnorm/_forward_module.model.norm.weight": 19.848438262939453, "gnorm/_forward_module.model.norm.weight": 0.012325823307037354, "pnorm/_forward_module.lm_head.weight": 198.01234436035156, "gnorm/_forward_module.lm_head.weight": 0.050430234521627426}
{"step": 671088640, "pnorm/_forward_module.model.embeddings.weight": 98.42638397216797, "gnorm/_forward_module.model.embeddings.weight": 0.2023174911737442, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.06395721435547, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.009707758203148842, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.729985237121582, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.029261939227581024, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.743839740753174, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.02807682193815708, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.806092262268066, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.3390304744243622, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.76533317565918, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.35863423347473145, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2732795476913452, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.03094956837594509, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.1287548989057541, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0021958923898637295, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.031354904174805, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.006607855204492807, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.468613624572754, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.1986837089061737, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.30615520477295, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.1711643934249878, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.51612091064453, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.012049910612404346, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 8.071556091308594, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.02388971857726574, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 7.224215984344482, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.06019391119480133, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.020712375640869, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.2963774502277374, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.3237504959106445, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.12947744131088257, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.0721936225891113, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.013252650387585163, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16182689368724823, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0019557576160877943, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.25555419921875, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.003794329008087516, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.408302307128906, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.10745280236005783, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.736898422241211, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.07972017675638199, "pnorm/_forward_module.model.norm.weight": 19.959779739379883, "gnorm/_forward_module.model.norm.weight": 0.01226205937564373, "pnorm/_forward_module.lm_head.weight": 199.82618713378906, "gnorm/_forward_module.lm_head.weight": 0.056313905864953995}
{"step": 692060160, "pnorm/_forward_module.model.embeddings.weight": 98.77183532714844, "gnorm/_forward_module.model.embeddings.weight": 0.20573918521404266, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.05557632446289, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.007639171089977026, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.727658748626709, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.02827189303934574, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.742532730102539, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.03340359032154083, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.8024187088012695, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.27409180998802185, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.762091636657715, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.3130691945552826, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2716904878616333, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.0304715633392334, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.13165368139743805, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0041915299370884895, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.024314880371094, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0062354994006454945, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.470952987670898, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.18471813201904297, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.306315422058105, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.14491578936576843, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.55607795715332, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.010120926424860954, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 8.225302696228027, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.03127526864409447, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 7.308112144470215, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.07668560743331909, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.035125255584717, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.22492751479148865, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.347119331359863, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.12363525480031967, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.1043412685394287, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.0241320189088583, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16428865492343903, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.002711901906877756, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.26156997680664, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.006169233471155167, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.43750286102295, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.14955765008926392, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.751347541809082, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.10657127946615219, "pnorm/_forward_module.model.norm.weight": 20.069307327270508, "gnorm/_forward_module.model.norm.weight": 0.014878339134156704, "pnorm/_forward_module.lm_head.weight": 201.55836486816406, "gnorm/_forward_module.lm_head.weight": 0.08472712337970734}
{"step": 713031680, "pnorm/_forward_module.model.embeddings.weight": 99.1022720336914, "gnorm/_forward_module.model.embeddings.weight": 0.22916169464588165, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.04576301574707, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.010042784735560417, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.722154140472412, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.041266195476055145, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.7379021644592285, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.04137364402413368, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.797547817230225, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.3477133810520172, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.757993221282959, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.4053873121738434, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2712900638580322, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.04868757352232933, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.13438837230205536, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.003433995181694627, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.01668357849121, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.007419743575155735, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.471571922302246, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.22989535331726074, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.305148124694824, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.18718764185905457, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.597389221191406, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.011514841578900814, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 8.378793716430664, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.037647731602191925, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 7.390815258026123, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.10081840306520462, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.050039768218994, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.3397084176540375, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.371720790863037, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.18449492752552032, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.1350040435791016, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.019360367208719254, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16659072041511536, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0018619300099089742, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.268321990966797, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.010180658660829067, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.466082572937012, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.24584978818893433, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.766352653503418, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.17454735934734344, "pnorm/_forward_module.model.norm.weight": 20.177553176879883, "gnorm/_forward_module.model.norm.weight": 0.018703162670135498, "pnorm/_forward_module.lm_head.weight": 203.2186737060547, "gnorm/_forward_module.lm_head.weight": 0.11267819255590439}
{"step": 734003200, "pnorm/_forward_module.model.embeddings.weight": 99.41828155517578, "gnorm/_forward_module.model.embeddings.weight": 0.16045638918876648, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.03580665588379, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.006328540854156017, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.713886737823486, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.022718990221619606, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.730217933654785, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.02344977669417858, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.793262958526611, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.26672127842903137, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.754383087158203, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.30776122212409973, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2701600790023804, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.027197683230042458, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.13735666871070862, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0017019683727994561, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 16.00797462463379, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.005435958504676819, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.469038009643555, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.17133906483650208, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.302346229553223, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.13347741961479187, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.640018463134766, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.00862868782132864, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 8.534324645996094, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.02631223015487194, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 7.474114894866943, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.06729573011398315, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.065020561218262, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.2130332589149475, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.396427631378174, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.11398731917142868, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.1629278659820557, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.01139635406434536, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16882263123989105, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0012246110709384084, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.273733139038086, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.004698020406067371, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.491894721984863, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.12458794564008713, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.77895736694336, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.09849343448877335, "pnorm/_forward_module.model.norm.weight": 20.28345489501953, "gnorm/_forward_module.model.norm.weight": 0.014805878512561321, "pnorm/_forward_module.lm_head.weight": 204.81576538085938, "gnorm/_forward_module.lm_head.weight": 0.06694518774747849}
{"step": 754974720, "pnorm/_forward_module.model.embeddings.weight": 99.71966552734375, "gnorm/_forward_module.model.embeddings.weight": 0.15029793977737427, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.025001525878906, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.006898676976561546, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.7045488357543945, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.02405928261578083, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.721875190734863, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.02591676451265812, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.788855075836182, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.24748513102531433, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.751063346862793, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.2838912904262543, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.269788384437561, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.025977246463298798, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.14054937660694122, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0035993424244225025, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.999175071716309, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0045345136895775795, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.466290473937988, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.15724687278270721, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.2999906539917, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.1246451810002327, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.684524536132812, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.008167837746441364, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 8.692224502563477, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.021852541714906693, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 7.55929708480835, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.05597616732120514, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.080973148345947, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.18784654140472412, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.4225172996521, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.09960131347179413, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.191275119781494, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.017015784978866577, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1711260825395584, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0026595895178616047, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.277015686035156, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.003529248759150505, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.512235641479492, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.09780817478895187, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.789041519165039, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.07379096001386642, "pnorm/_forward_module.model.norm.weight": 20.387346267700195, "gnorm/_forward_module.model.norm.weight": 0.014528797939419746, "pnorm/_forward_module.lm_head.weight": 206.3470458984375, "gnorm/_forward_module.lm_head.weight": 0.046683263033628464}
{"step": 775946240, "pnorm/_forward_module.model.embeddings.weight": 100.00704956054688, "gnorm/_forward_module.model.embeddings.weight": 0.2941301465034485, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.014699935913086, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.011541558429598808, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.695476531982422, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.03180893138051033, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.7142415046691895, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.03302828222513199, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.785155296325684, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.45532307028770447, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.748058795928955, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.5084837675094604, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2693874835968018, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.029900360852479935, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.14333626627922058, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0029146773740649223, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.988802909851074, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.008055591024458408, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.461150169372559, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.278753399848938, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.29558277130127, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.23493948578834534, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.728219985961914, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.017841152846813202, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 8.846673965454102, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.036290764808654785, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 7.64326286315918, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.11306578665971756, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.096703052520752, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.44159573316574097, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.448803901672363, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.173120379447937, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.2153208255767822, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.009259268641471863, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1729714721441269, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0005688453675247729, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.28045654296875, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0070902579464018345, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.531828880310059, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.19970548152923584, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.799120903015137, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.15621861815452576, "pnorm/_forward_module.model.norm.weight": 20.489822387695312, "gnorm/_forward_module.model.norm.weight": 0.008953014388680458, "pnorm/_forward_module.lm_head.weight": 207.82281494140625, "gnorm/_forward_module.lm_head.weight": 0.08722332864999771}
{"step": 796917760, "pnorm/_forward_module.model.embeddings.weight": 100.27958679199219, "gnorm/_forward_module.model.embeddings.weight": 0.1832289844751358, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 16.005470275878906, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0069400048814713955, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.685826301574707, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.03822636604309082, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.706052303314209, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.033523332327604294, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.782008647918701, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.2706996500492096, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.745550632476807, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.33682262897491455, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.268787145614624, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.05000722035765648, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.14555391669273376, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0057035330682992935, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.978593826293945, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.006318510975688696, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.455473899841309, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.2062811255455017, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.291293144226074, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.15104669332504272, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.768680572509766, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.009643254801630974, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 8.991643905639648, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.028506657108664513, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 7.719318389892578, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.07179594039916992, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.110976219177246, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.2041086107492447, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.473086357116699, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.10774476826190948, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.241485357284546, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.00873855222016573, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.17500139772891998, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0007690155762247741, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.283525466918945, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.005600798409432173, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.550729751586914, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.12909932434558868, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.808244705200195, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.1004580557346344, "pnorm/_forward_module.model.norm.weight": 20.588937759399414, "gnorm/_forward_module.model.norm.weight": 0.009729093872010708, "pnorm/_forward_module.lm_head.weight": 209.23521423339844, "gnorm/_forward_module.lm_head.weight": 0.05669531598687172}
{"step": 817889280, "pnorm/_forward_module.model.embeddings.weight": 100.53945922851562, "gnorm/_forward_module.model.embeddings.weight": 0.2048853039741516, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.994409561157227, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.009433403611183167, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.67338752746582, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.034393519163131714, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.694585800170898, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.03784063085913658, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.777283191680908, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.3501676321029663, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.741778373718262, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.44901904463768005, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2697489261627197, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.04371098056435585, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.14827734231948853, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.006499612703919411, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.968451499938965, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.008888251148164272, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.448187828063965, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.2834368944168091, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.286165237426758, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.2052830010652542, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.809907913208008, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.014295912347733974, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 9.133918762207031, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.04031810536980629, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 7.7953877449035645, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.1146523505449295, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.126290321350098, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.33595117926597595, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.499974250793457, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.15768085420131683, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.259138345718384, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.01477715466171503, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.17634497582912445, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0011085477890446782, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.287715911865234, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.008569443598389626, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.570694923400879, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.2262781411409378, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.81857681274414, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.1694159209728241, "pnorm/_forward_module.model.norm.weight": 20.688167572021484, "gnorm/_forward_module.model.norm.weight": 0.007088753394782543, "pnorm/_forward_module.lm_head.weight": 210.61212158203125, "gnorm/_forward_module.lm_head.weight": 0.08050274103879929}
{"step": 838860800, "pnorm/_forward_module.model.embeddings.weight": 100.78669738769531, "gnorm/_forward_module.model.embeddings.weight": 0.1814885139465332, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.98440933227539, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.007136930711567402, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.662417888641357, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.03654519468545914, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.685204029083252, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.032343778759241104, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.773070335388184, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.26074478030204773, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.738387107849121, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.31438031792640686, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2728660106658936, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.03681902587413788, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.1507289558649063, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0008591669029556215, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.958623886108398, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0061768339946866035, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.440506935119629, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.18882790207862854, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.280240058898926, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.14375153183937073, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.848556518554688, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.009691792540252209, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 9.26003360748291, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.02757353149354458, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 7.864212989807129, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.06520722061395645, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.1426472663879395, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.2088884860277176, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.528327941894531, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.09573116898536682, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.273344039916992, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.012824106961488724, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.17733317613601685, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00122366554569453, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.28965950012207, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0032275246921926737, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.585335731506348, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.09093200415372849, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.825887680053711, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.06821323931217194, "pnorm/_forward_module.model.norm.weight": 20.784013748168945, "gnorm/_forward_module.model.norm.weight": 0.010386320762336254, "pnorm/_forward_module.lm_head.weight": 211.93174743652344, "gnorm/_forward_module.lm_head.weight": 0.0450613759458065}
{"step": 859832320, "pnorm/_forward_module.model.embeddings.weight": 101.0207748413086, "gnorm/_forward_module.model.embeddings.weight": 0.18346385657787323, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.974037170410156, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.007925605401396751, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.650774002075195, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.029444964602589607, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.67523717880249, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.03195954114198685, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.768524169921875, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.3616202771663666, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.73456335067749, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.48957133293151855, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2762458324432373, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.03917951136827469, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.15295782685279846, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.004500327631831169, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.948342323303223, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.011855985037982464, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.431108474731445, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.3272835612297058, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.273822784423828, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.22896456718444824, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.886762619018555, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.012894567102193832, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 9.379981994628906, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.04459884762763977, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 7.930870056152344, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.11406629532575607, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.158458709716797, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.2992749810218811, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.5570197105407715, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.15945610404014587, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.2892327308654785, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.016015561297535896, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.17842204868793488, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0021379359532147646, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.292537689208984, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.00965035893023014, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.601076126098633, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.23617495596408844, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.83443832397461, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.17591136693954468, "pnorm/_forward_module.model.norm.weight": 20.879352569580078, "gnorm/_forward_module.model.norm.weight": 0.017016399651765823, "pnorm/_forward_module.lm_head.weight": 213.21156311035156, "gnorm/_forward_module.lm_head.weight": 0.0802975669503212}
{"step": 880803840, "pnorm/_forward_module.model.embeddings.weight": 101.24177551269531, "gnorm/_forward_module.model.embeddings.weight": 0.16789402067661285, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.964214324951172, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0070615303702652454, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.640183448791504, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.026845300570130348, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.66603422164917, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.026619650423526764, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.763975143432617, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.25907203555107117, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.730916976928711, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.32100245356559753, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2792272567749023, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.023407800123095512, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.15512621402740479, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.002280850661918521, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.939168930053711, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.006012500263750553, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.422830581665039, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.20594081282615662, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.26800537109375, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.15959183871746063, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.922321319580078, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.010323814116418362, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 9.49052906036377, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.026982789859175682, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 7.991828918457031, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.06679588556289673, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.173741817474365, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.20969340205192566, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.584741592407227, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.09153863787651062, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3061506748199463, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.01397742424160242, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.17956143617630005, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0021234408486634493, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.294673919677734, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0033097751438617706, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.614846229553223, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.09369784593582153, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.8408784866333, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.06982149928808212, "pnorm/_forward_module.model.norm.weight": 20.97102165222168, "gnorm/_forward_module.model.norm.weight": 0.010389507748186588, "pnorm/_forward_module.lm_head.weight": 214.4483642578125, "gnorm/_forward_module.lm_head.weight": 0.0400870181620121}
{"step": 901775360, "pnorm/_forward_module.model.embeddings.weight": 101.44911193847656, "gnorm/_forward_module.model.embeddings.weight": 0.20068015158176422, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.955327987670898, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.009524423629045486, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.629510402679443, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.0245619248598814, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.6568474769592285, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.02525559812784195, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.759893894195557, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.3585737943649292, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.727430820465088, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.4296553134918213, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2833362817764282, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.025101590901613235, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.15728673338890076, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0018407750176265836, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.93018627166748, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.009933617897331715, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.413580894470215, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.2766164243221283, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.26155948638916, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.21431373059749603, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.95285415649414, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.01485259085893631, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 9.58083438873291, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.04534691199660301, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.044313430786133, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.09238847345113754, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.187687873840332, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.3050750195980072, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.611330032348633, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.13075096905231476, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.316879987716675, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.017985261976718903, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18027234077453613, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0025518012698739767, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.29717254638672, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.00512923626229167, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.628528594970703, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.13240854442119598, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.848450660705566, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.09399112313985825, "pnorm/_forward_module.model.norm.weight": 21.06087875366211, "gnorm/_forward_module.model.norm.weight": 0.011922747828066349, "pnorm/_forward_module.lm_head.weight": 215.62326049804688, "gnorm/_forward_module.lm_head.weight": 0.05497278273105621}
{"step": 922746880, "pnorm/_forward_module.model.embeddings.weight": 101.64248657226562, "gnorm/_forward_module.model.embeddings.weight": 0.19082310795783997, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.947284698486328, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.007863345555961132, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.6219658851623535, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.021198788657784462, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.6508097648620605, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.021913129836320877, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.755426406860352, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.3009895980358124, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.723777770996094, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.381212055683136, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2862111330032349, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.02379879727959633, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.15912340581417084, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0016216224757954478, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.921552658081055, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.006690130103379488, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.403554916381836, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.24088314175605774, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.254583358764648, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.1815287172794342, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 16.98207664489746, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.01192416436970234, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 9.66530704498291, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.035881854593753815, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.093770027160645, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.09240707755088806, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.20151948928833, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.289766401052475, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.638235569000244, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.13748212158679962, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.326849937438965, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.013054713606834412, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18095125257968903, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0018752665491774678, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.299930572509766, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0070016393437981606, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.642023086547852, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.17514893412590027, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.855536460876465, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.13465356826782227, "pnorm/_forward_module.model.norm.weight": 21.14870834350586, "gnorm/_forward_module.model.norm.weight": 0.007644960191100836, "pnorm/_forward_module.lm_head.weight": 216.75759887695312, "gnorm/_forward_module.lm_head.weight": 0.06925247609615326}
{"step": 943718400, "pnorm/_forward_module.model.embeddings.weight": 101.82350158691406, "gnorm/_forward_module.model.embeddings.weight": 0.17356620728969574, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.93988037109375, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.007010822184383869, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.614771366119385, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.027681749314069748, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.644596099853516, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.027988221496343613, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.751139163970947, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.2861108183860779, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.720247745513916, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.351108193397522, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2911932468414307, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.032926637679338455, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.16129165887832642, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.002102445112541318, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.91360092163086, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.005902737844735384, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.394438743591309, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.2197238802909851, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.248025894165039, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.17487017810344696, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.009201049804688, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.01172678917646408, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 9.74010181427002, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.03258282691240311, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.137377738952637, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.07780226320028305, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.214691638946533, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.2573513090610504, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.6639604568481445, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.11335118114948273, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3389036655426025, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.012234704568982124, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18159765005111694, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.001714679878205061, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.30173110961914, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.005596610717475414, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.65324592590332, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.14213071763515472, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.861074447631836, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.10636811703443527, "pnorm/_forward_module.model.norm.weight": 21.23310661315918, "gnorm/_forward_module.model.norm.weight": 0.007538175676018, "pnorm/_forward_module.lm_head.weight": 217.8425750732422, "gnorm/_forward_module.lm_head.weight": 0.05098746716976166}
{"step": 964689920, "pnorm/_forward_module.model.embeddings.weight": 101.9930648803711, "gnorm/_forward_module.model.embeddings.weight": 0.20876120030879974, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.932652473449707, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.007864234037697315, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.609504699707031, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.03434189409017563, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.640050888061523, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.03374037146568298, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.746485233306885, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.33770278096199036, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.716505527496338, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.42687711119651794, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2944157123565674, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.03733007609844208, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.1632675677537918, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.006102441344410181, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.906389236450195, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.00912489090114832, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.385597229003906, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.2765856385231018, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.241890907287598, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.20960266888141632, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.034765243530273, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.01435005385428667, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 9.810948371887207, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.04217510297894478, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.178857803344727, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.12021086364984512, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.226593017578125, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.2998025119304657, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.688065052032471, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.13232867419719696, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3504257202148438, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.023664681240916252, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18241749703884125, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.003028205130249262, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.304214477539062, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.005487241316586733, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.664689064025879, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.15205077826976776, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.86729907989502, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.11423949152231216, "pnorm/_forward_module.model.norm.weight": 21.31633949279785, "gnorm/_forward_module.model.norm.weight": 0.013164620846509933, "pnorm/_forward_module.lm_head.weight": 218.8914337158203, "gnorm/_forward_module.lm_head.weight": 0.09171262383460999}
{"step": 985661440, "pnorm/_forward_module.model.embeddings.weight": 102.14974212646484, "gnorm/_forward_module.model.embeddings.weight": 0.16487039625644684, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.927387237548828, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.007290668785572052, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.604660511016846, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.026181118562817574, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.635951995849609, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.023724831640720367, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.743817329406738, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.29293951392173767, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.714225769042969, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.4008455276489258, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.2981969118118286, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.03799102082848549, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.16524747014045715, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.004527045879513025, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.90011215209961, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.010372592136263847, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.37763786315918, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.30828744173049927, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.236249923706055, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.22842682898044586, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.054792404174805, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.013232003897428513, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 9.866479873657227, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.05138188228011131, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.213126182556152, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.10030994564294815, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.236813068389893, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.23962000012397766, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.708548545837402, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.0963435173034668, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3546767234802246, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.011565030552446842, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18273603916168213, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0012512424727901816, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.305910110473633, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0034745591692626476, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.674751281738281, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.1003837138414383, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.871770858764648, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.08384432643651962, "pnorm/_forward_module.model.norm.weight": 21.39535903930664, "gnorm/_forward_module.model.norm.weight": 0.00875948742032051, "pnorm/_forward_module.lm_head.weight": 219.88034057617188, "gnorm/_forward_module.lm_head.weight": 0.04462669789791107}
{"step": 1006632960, "pnorm/_forward_module.model.embeddings.weight": 102.29605865478516, "gnorm/_forward_module.model.embeddings.weight": 0.17986048758029938, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.92068099975586, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.007733121979981661, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.598412990570068, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.027450526133179665, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.630649566650391, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.026769110932946205, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.739149570465088, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.3397288918495178, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.71005392074585, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.4974135160446167, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3009029626846313, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.04087759554386139, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.16694039106369019, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.006430420093238354, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.893454551696777, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.015873372554779053, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.36810302734375, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.40317636728286743, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.229598045349121, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.27948319911956787, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.076663970947266, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.016966352239251137, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 9.922720909118652, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.06768778711557388, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.246980667114258, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.14271363615989685, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.248295783996582, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.26796334981918335, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.732372760772705, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.12933741509914398, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.361959218978882, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.019832491874694824, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18320026993751526, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0037869815714657307, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.308120727539062, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.008340614847838879, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.684333801269531, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.19745224714279175, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.877388000488281, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.14926695823669434, "pnorm/_forward_module.model.norm.weight": 21.473039627075195, "gnorm/_forward_module.model.norm.weight": 0.01367971207946539, "pnorm/_forward_module.lm_head.weight": 220.82443237304688, "gnorm/_forward_module.lm_head.weight": 0.05586986988782883}
{"step": 1027604480, "pnorm/_forward_module.model.embeddings.weight": 102.4328842163086, "gnorm/_forward_module.model.embeddings.weight": 0.14153797924518585, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.9141206741333, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0053754993714392185, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.593119144439697, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.020610470324754715, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.626016616821289, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.02075980231165886, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.733950138092041, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.2275284230709076, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.705578804016113, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.31673505902290344, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3042224645614624, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.0268938560038805, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.16871759295463562, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0026769316755235195, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.88742733001709, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.007082389667630196, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.359513282775879, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.21420371532440186, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.223776817321777, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.14997003972530365, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.098005294799805, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.00915715005248785, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 9.975911140441895, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.027297521010041237, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.279853820800781, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.07506737858057022, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.25955057144165, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.21294289827346802, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.756106853485107, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.09636084735393524, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3693838119506836, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.011870460584759712, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18359726667404175, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0014974857913330197, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.30974578857422, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.004895792808383703, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.692460060119629, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.12879852950572968, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.881689071655273, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.09795743972063065, "pnorm/_forward_module.model.norm.weight": 21.548620223999023, "gnorm/_forward_module.model.norm.weight": 0.0111366156488657, "pnorm/_forward_module.lm_head.weight": 221.7332763671875, "gnorm/_forward_module.lm_head.weight": 0.0535142719745636}
{"step": 1048576000, "pnorm/_forward_module.model.embeddings.weight": 102.56021118164062, "gnorm/_forward_module.model.embeddings.weight": 0.1693047136068344, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.90796947479248, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.007335268892347813, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.590385437011719, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.020377231761813164, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.623666763305664, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.02136346697807312, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.728721618652344, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.31203457713127136, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.70127534866333, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.41276323795318604, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3064018487930298, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.024862375110387802, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.17037126421928406, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0019539035856723785, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.881587982177734, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.01187108550220728, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.35114860534668, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.319108247756958, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.218084335327148, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.2408890724182129, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.11786460876465, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.016008788719773293, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.025984764099121, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.06185256689786911, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.311248779296875, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.11557826399803162, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.270578384399414, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.30105558037757874, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.779449462890625, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.11350803822278976, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3762025833129883, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.014851836487650871, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18399572372436523, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.002400873461738229, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.31165313720703, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.004692550748586655, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.701004981994629, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.13482093811035156, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.886446952819824, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.1011568009853363, "pnorm/_forward_module.model.norm.weight": 21.62242317199707, "gnorm/_forward_module.model.norm.weight": 0.00772079499438405, "pnorm/_forward_module.lm_head.weight": 222.6094207763672, "gnorm/_forward_module.lm_head.weight": 0.04369160160422325}
{"step": 1069547520, "pnorm/_forward_module.model.embeddings.weight": 102.67849731445312, "gnorm/_forward_module.model.embeddings.weight": 0.19082877039909363, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.90213680267334, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0069014085456728935, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.5854268074035645, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.029337555170059204, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.619521141052246, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.02587662823498249, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.724156379699707, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.29395750164985657, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.697343349456787, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.3541196882724762, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.308714747428894, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.03794652968645096, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.17158685624599457, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.001444804249331355, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.875810623168945, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.006337857339531183, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.342477798461914, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.23104946315288544, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.212176322937012, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.18747903406620026, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.136703491210938, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.011965814977884293, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.071950912475586, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.027294037863612175, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.340375900268555, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.07525932043790817, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.280647277832031, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.29078295826911926, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.801392555236816, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.10596686601638794, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3830528259277344, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.0188151765614748, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18432655930519104, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0025812797248363495, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.313566207885742, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0043296655640006065, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.708402633666992, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.1137683093547821, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.8912935256958, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.08484566956758499, "pnorm/_forward_module.model.norm.weight": 21.694154739379883, "gnorm/_forward_module.model.norm.weight": 0.010983165353536606, "pnorm/_forward_module.lm_head.weight": 223.4422149658203, "gnorm/_forward_module.lm_head.weight": 0.05825776234269142}
{"step": 1090519040, "pnorm/_forward_module.model.embeddings.weight": 102.78712463378906, "gnorm/_forward_module.model.embeddings.weight": 0.20217010378837585, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.895978927612305, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0085111940279603, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.581046104431152, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.025045957416296005, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.615750312805176, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.02590840682387352, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.719194412231445, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.33764567971229553, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.693104267120361, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.5232690572738647, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.310657024383545, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.026938248425722122, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.17312565445899963, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.003408517688512802, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.870555877685547, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.018705032765865326, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.33444595336914, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.49030977487564087, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.206950187683105, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.3770294785499573, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.155467987060547, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.023661073297262192, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.116619110107422, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.11396446079015732, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.367951393127441, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.20313650369644165, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.29062032699585, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.2700331211090088, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.823119163513184, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.11225222796201706, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.388913869857788, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.02665448561310768, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1846686750650406, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.004765696823596954, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.315032958984375, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.006634844932705164, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.715108871459961, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.15584735572338104, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.895450592041016, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.1280859261751175, "pnorm/_forward_module.model.norm.weight": 21.763442993164062, "gnorm/_forward_module.model.norm.weight": 0.012544789351522923, "pnorm/_forward_module.lm_head.weight": 224.2252655029297, "gnorm/_forward_module.lm_head.weight": 0.06585974246263504}
{"step": 1111490560, "pnorm/_forward_module.model.embeddings.weight": 102.88382720947266, "gnorm/_forward_module.model.embeddings.weight": 0.2501932978630066, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.893465995788574, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.01106497272849083, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.581554412841797, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.03750867024064064, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.616550922393799, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.03368225321173668, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.7172017097473145, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.44433897733688354, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.691493988037109, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.5391713976860046, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3124750852584839, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.049982912838459015, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.1742357760667801, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.00895225815474987, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.866301536560059, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.011020827107131481, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.327747344970703, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.3501618206501007, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.202301025390625, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.2728751599788666, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.168426513671875, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.017109055072069168, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.14897632598877, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.05087071284651756, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.388800621032715, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.1033000573515892, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.297657012939453, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.3988055884838104, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.837750434875488, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.15205317735671997, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3945491313934326, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.018211262300610542, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18504619598388672, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0021102491300553083, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.3171443939209, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.00656572449952364, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.723258018493652, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.1738688349723816, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.899774551391602, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.12663249671459198, "pnorm/_forward_module.model.norm.weight": 21.825292587280273, "gnorm/_forward_module.model.norm.weight": 0.007236046716570854, "pnorm/_forward_module.lm_head.weight": 224.94003295898438, "gnorm/_forward_module.lm_head.weight": 0.06637335568666458}
{"step": 1132462080, "pnorm/_forward_module.model.embeddings.weight": 102.96873474121094, "gnorm/_forward_module.model.embeddings.weight": 0.1559503972530365, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.891016006469727, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.006524207070469856, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.580652713775635, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.019349709153175354, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.61602258682251, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.01936171017587185, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.715234279632568, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.2740393579006195, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.689601421356201, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.34906846284866333, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3143556118011475, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.023494109511375427, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.17545589804649353, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0011950285406783223, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.861599922180176, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.009083227254450321, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.320484161376953, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.2716090679168701, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.197393417358398, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.20954608917236328, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.182083129882812, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.011893347837030888, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.180588722229004, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.04656878113746643, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.40943717956543, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.08488254249095917, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.304362773895264, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.24403584003448486, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.852314472198486, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.09432264417409897, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.400144100189209, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.014670162461698055, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18529288470745087, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.002170954365283251, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.318035125732422, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.003032667562365532, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.728605270385742, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.09854593127965927, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.902604103088379, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.07503321021795273, "pnorm/_forward_module.model.norm.weight": 21.886613845825195, "gnorm/_forward_module.model.norm.weight": 0.008443300612270832, "pnorm/_forward_module.lm_head.weight": 225.6268310546875, "gnorm/_forward_module.lm_head.weight": 0.04605492204427719}
{"step": 1153433600, "pnorm/_forward_module.model.embeddings.weight": 103.05156707763672, "gnorm/_forward_module.model.embeddings.weight": 0.15612585842609406, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.887085914611816, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.005690640304237604, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.577545642852783, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.019731732085347176, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.613215923309326, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.02038753405213356, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.711871147155762, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.25409597158432007, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.6867146492004395, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.36728808283805847, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3163366317749023, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.027507316321134567, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.17658911645412445, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.003947122022509575, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.857359886169434, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.01193216722458601, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.313462257385254, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.3103000521659851, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.192708015441895, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.22500650584697723, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.195913314819336, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.013041067868471146, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.211369514465332, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.05031440779566765, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.428926467895508, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.1023046001791954, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.3117995262146, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.25228843092918396, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.868342399597168, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.09457438439130783, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.406120538711548, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.012620446272194386, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18558672070503235, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0019405399216338992, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.319320678710938, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.004176177550107241, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.733711242675781, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.11090563237667084, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.90592098236084, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.0983099490404129, "pnorm/_forward_module.model.norm.weight": 21.9486083984375, "gnorm/_forward_module.model.norm.weight": 0.009846093133091927, "pnorm/_forward_module.lm_head.weight": 226.3025665283203, "gnorm/_forward_module.lm_head.weight": 0.04789487645030022}
{"step": 1174405120, "pnorm/_forward_module.model.embeddings.weight": 103.12883758544922, "gnorm/_forward_module.model.embeddings.weight": 0.1419372856616974, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.882060050964355, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.004603615030646324, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.57413387298584, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.02060488611459732, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.610162734985352, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.020885169506072998, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.707035064697266, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.23249083757400513, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.682711124420166, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.3073248565196991, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3179574012756348, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.027879172936081886, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.17760980129241943, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.004162895958870649, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.852840423583984, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.00832675863057375, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.306145668029785, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.22844967246055603, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.18792724609375, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.17454609274864197, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.210737228393555, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.009796379134058952, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.24425983428955, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.03274759277701378, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.449557304382324, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.07083716988563538, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.319865703582764, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.21880894899368286, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.8865966796875, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.07812142372131348, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4112679958343506, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.009993444196879864, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18579219281673431, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0012294561602175236, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.32147979736328, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0026528793387115, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.739933967590332, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.08591285347938538, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.90973949432373, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.06370110809803009, "pnorm/_forward_module.model.norm.weight": 22.009458541870117, "gnorm/_forward_module.model.norm.weight": 0.009036269970238209, "pnorm/_forward_module.lm_head.weight": 226.94886779785156, "gnorm/_forward_module.lm_head.weight": 0.035757578909397125}
{"step": 1195376640, "pnorm/_forward_module.model.embeddings.weight": 103.19895935058594, "gnorm/_forward_module.model.embeddings.weight": 0.16093100607395172, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.877222061157227, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.006269319914281368, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.572152137756348, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.018578652292490005, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.608633995056152, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.018949970602989197, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.702335357666016, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.26014575362205505, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.6786885261535645, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.33814769983291626, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3184679746627808, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.022070901468396187, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.17837318778038025, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0020456404890865088, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.848605155944824, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.006695673801004887, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.298807144165039, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.23964223265647888, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.18325138092041, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.17733590304851532, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.224916458129883, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.011893405579030514, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.275548934936523, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.028149278834462166, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.469714164733887, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.07910269498825073, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.327990531921387, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.24589358270168304, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.904640197753906, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.09415413439273834, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.415374994277954, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.019618002697825432, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18593524396419525, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.002801851835101843, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.323211669921875, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.005821161903440952, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.745012283325195, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.1347561776638031, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.913315773010254, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.11064442992210388, "pnorm/_forward_module.model.norm.weight": 22.06816291809082, "gnorm/_forward_module.model.norm.weight": 0.006446824874728918, "pnorm/_forward_module.lm_head.weight": 227.56544494628906, "gnorm/_forward_module.lm_head.weight": 0.0471494160592556}
{"step": 1216348160, "pnorm/_forward_module.model.embeddings.weight": 103.26216888427734, "gnorm/_forward_module.model.embeddings.weight": 0.19601409137248993, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.8729887008667, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.007215828634798527, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.570526123046875, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.023747660219669342, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.607322692871094, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.025270530954003334, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.698278427124023, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.320171594619751, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.67525053024292, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.4679461121559143, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3188250064849854, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.02741520293056965, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.179341122508049, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.003914662171155214, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.844778060913086, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.014351869933307171, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.292243957519531, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.4204959571361542, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.179034233093262, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.32159262895584106, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.237720489501953, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.021546470001339912, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.304530143737793, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.09579608589410782, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.489147186279297, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.17237195372581482, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.335422039031982, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.2735241949558258, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.920714378356934, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.0979171022772789, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.420008897781372, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.018355663865804672, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18619240820407867, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.003074074862524867, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.32512664794922, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.003838050877675414, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.750205993652344, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.1107853353023529, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.916634559631348, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.08300897479057312, "pnorm/_forward_module.model.norm.weight": 22.124393463134766, "gnorm/_forward_module.model.norm.weight": 0.007111974060535431, "pnorm/_forward_module.lm_head.weight": 228.14535522460938, "gnorm/_forward_module.lm_head.weight": 0.04150500148534775}
{"step": 1237319680, "pnorm/_forward_module.model.embeddings.weight": 103.31824493408203, "gnorm/_forward_module.model.embeddings.weight": 0.15249167382717133, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.869119644165039, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.006876194849610329, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.568167209625244, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.01645149663090706, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.605643272399902, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.017080901190638542, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.694540023803711, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.24109135568141937, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.672000408172607, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.36109429597854614, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3198449611663818, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.019504515454173088, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.18012242019176483, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0027866805903613567, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.840935707092285, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.012135584838688374, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.2854585647583, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.3442583680152893, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.174476623535156, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.2656064033508301, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.24998664855957, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.01733585260808468, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.331338882446289, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.07172558456659317, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.507621765136719, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.13316218554973602, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.342595100402832, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.21800048649311066, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.936733245849609, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.07379638403654099, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4240949153900146, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.010898538865149021, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1863754689693451, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.001796509837731719, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.32634735107422, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.002839357126504183, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.754243850708008, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.07558145374059677, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.919464111328125, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.062425851821899414, "pnorm/_forward_module.model.norm.weight": 22.178955078125, "gnorm/_forward_module.model.norm.weight": 0.007467786315828562, "pnorm/_forward_module.lm_head.weight": 228.6957550048828, "gnorm/_forward_module.lm_head.weight": 0.035143572837114334}
{"step": 1258291200, "pnorm/_forward_module.model.embeddings.weight": 103.36893463134766, "gnorm/_forward_module.model.embeddings.weight": 0.14352966845035553, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.865558624267578, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.00502143707126379, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.566891193389893, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.02071889117360115, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.604807376861572, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.02091052196919918, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.690766334533691, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.22926384210586548, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.6687469482421875, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.29022783041000366, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.32082998752594, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.025885822251439095, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.1808725893497467, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.003613131120800972, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.83779239654541, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0060124825686216354, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.279828071594238, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.21348963677883148, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.1707124710083, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.161245197057724, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.260967254638672, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.008908871561288834, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.355624198913574, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.03161897882819176, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.523452758789062, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.06385143101215363, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.3487396240234375, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.2205084264278412, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.950456142425537, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.08015234023332596, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.427696943283081, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.006300655659288168, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18649475276470184, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00040469635860063136, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.32834243774414, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0026505901478230953, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.759354591369629, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.08500144630670547, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.922650337219238, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.06311117112636566, "pnorm/_forward_module.model.norm.weight": 22.230451583862305, "gnorm/_forward_module.model.norm.weight": 0.006500617600977421, "pnorm/_forward_module.lm_head.weight": 229.21385192871094, "gnorm/_forward_module.lm_head.weight": 0.039977580308914185}
{"step": 1279262720, "pnorm/_forward_module.model.embeddings.weight": 103.41455841064453, "gnorm/_forward_module.model.embeddings.weight": 0.13748426735401154, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.86188793182373, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.005250784568488598, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.56583309173584, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.020701607689261436, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.603922367095947, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.02087489701807499, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.686760902404785, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.20490768551826477, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.665287017822266, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.26018866896629333, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3220733404159546, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.02551218681037426, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.18181048333644867, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0037790341302752495, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.834797859191895, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.005377883091568947, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.274264335632324, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.16394460201263428, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.167102813720703, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.13686539232730865, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.2718448638916, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.008800630457699299, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.377555847167969, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.02308077923953533, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.537931442260742, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.05162964388728142, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.354912757873535, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.2003174126148224, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.964374542236328, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.07412245124578476, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4310851097106934, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.011934884823858738, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18663400411605835, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0015136540168896317, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.329374313354492, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.003168401075527072, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.76226806640625, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.0838395208120346, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.924881935119629, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.06583615392446518, "pnorm/_forward_module.model.norm.weight": 22.279447555541992, "gnorm/_forward_module.model.norm.weight": 0.008883069269359112, "pnorm/_forward_module.lm_head.weight": 229.70384216308594, "gnorm/_forward_module.lm_head.weight": 0.033489953726530075}
{"step": 1300234240, "pnorm/_forward_module.model.embeddings.weight": 103.45471954345703, "gnorm/_forward_module.model.embeddings.weight": 0.15033960342407227, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.8580322265625, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.005870689172297716, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.564122676849365, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.019962724298238754, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.602458953857422, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.02021753042936325, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.682665824890137, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.2305290848016739, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.661867141723633, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.36229875683784485, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3226439952850342, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.02106187306344509, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.18249118328094482, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0012514633126556873, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.831594467163086, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.014070906676352024, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.268144607543945, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.36959943175315857, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.16309642791748, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.2901357412338257, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.28300666809082, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.017655396834015846, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.400958061218262, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.07704304158687592, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.553579330444336, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.1399868279695511, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.361227035522461, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.19879098236560822, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.979035377502441, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.07693865150213242, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4343628883361816, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.014110813848674297, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18670450150966644, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0024068886414170265, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.330856323242188, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0026676191482692957, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.765628814697266, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.08448169380426407, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.927449226379395, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.06665822118520737, "pnorm/_forward_module.model.norm.weight": 22.327350616455078, "gnorm/_forward_module.model.norm.weight": 0.007984393276274204, "pnorm/_forward_module.lm_head.weight": 230.17123413085938, "gnorm/_forward_module.lm_head.weight": 0.044610850512981415}
{"step": 1321205760, "pnorm/_forward_module.model.embeddings.weight": 103.48985290527344, "gnorm/_forward_module.model.embeddings.weight": 0.1586742103099823, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.85388469696045, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.006464751437306404, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.56231164932251, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.01832476072013378, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.601312637329102, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.01903490535914898, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.678244113922119, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.28747138381004333, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.6582255363464355, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.49656909704208374, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.323103666305542, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.026671158149838448, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.1831931620836258, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.002754044719040394, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.82864761352539, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.02379566803574562, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.262556076049805, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.5773115158081055, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.159608840942383, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.4283662736415863, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.29410743713379, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.024433070793747902, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.423687934875488, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.12433648854494095, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.568618774414062, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.21799218654632568, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.367772102355957, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.20335431396961212, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 6.993528842926025, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.07518452405929565, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4371323585510254, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.021668538451194763, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18678848445415497, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.004277435131371021, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.332304000854492, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.003717394545674324, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.7689790725708, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.09699637442827225, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.929998397827148, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.07333303242921829, "pnorm/_forward_module.model.norm.weight": 22.37343406677246, "gnorm/_forward_module.model.norm.weight": 0.005692000966519117, "pnorm/_forward_module.lm_head.weight": 230.6092071533203, "gnorm/_forward_module.lm_head.weight": 0.0297714713960886}
{"step": 1342177280, "pnorm/_forward_module.model.embeddings.weight": 103.51941680908203, "gnorm/_forward_module.model.embeddings.weight": 0.16822947561740875, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.851408958435059, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.005851294379681349, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.56101655960083, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.020686153322458267, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.600114345550537, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.02084706351161003, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.675510406494141, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.25735533237457275, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.655917644500732, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.34024468064308167, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3244823217391968, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.025718094781041145, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.18398837745189667, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.002761024981737137, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.82624626159668, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.007572263013571501, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.257830619812012, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.2481101006269455, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.156598091125488, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.19244922697544098, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.30255699157715, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.010616090148687363, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.442078590393066, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.03832879662513733, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.581401824951172, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.07564505189657211, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.373035430908203, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.2591738998889923, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.004525184631348, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.08417227864265442, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4397401809692383, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.00829541590064764, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18690553307533264, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0010298852575942874, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.333314895629883, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0032268352806568146, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.771574974060059, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.09218841046094894, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.932046890258789, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.07652594149112701, "pnorm/_forward_module.model.norm.weight": 22.41591453552246, "gnorm/_forward_module.model.norm.weight": 0.006820782087743282, "pnorm/_forward_module.lm_head.weight": 231.01124572753906, "gnorm/_forward_module.lm_head.weight": 0.04575946554541588}
{"step": 1363148800, "pnorm/_forward_module.model.embeddings.weight": 103.54528045654297, "gnorm/_forward_module.model.embeddings.weight": 0.14128082990646362, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.848926544189453, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0052526528015732765, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.560962200164795, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.019917353987693787, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.600122928619385, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.02231457643210888, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.672504425048828, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.22419096529483795, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.65335750579834, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.2977455258369446, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3254438638687134, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.021818142384290695, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.1846310794353485, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.002565359231084585, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.823596000671387, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.00634473143145442, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.252552032470703, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.20930215716362, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.15310287475586, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.15206699073314667, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.31096649169922, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.008776779286563396, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.460115432739258, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.02610628493130207, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.593746185302734, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.05865367129445076, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.377760887145996, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.22314226627349854, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.01533317565918, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.08555981516838074, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4421350955963135, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.010611223056912422, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18695023655891418, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0013683864381164312, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.334497451782227, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.003641231684014201, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.77426815032959, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.10380375385284424, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.93415355682373, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.07822990417480469, "pnorm/_forward_module.model.norm.weight": 22.45749282836914, "gnorm/_forward_module.model.norm.weight": 0.008198102936148643, "pnorm/_forward_module.lm_head.weight": 231.3944091796875, "gnorm/_forward_module.lm_head.weight": 0.041718531399965286}
{"step": 1384120320, "pnorm/_forward_module.model.embeddings.weight": 103.56842803955078, "gnorm/_forward_module.model.embeddings.weight": 0.13557375967502594, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.845648765563965, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.004298862535506487, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.5607476234436035, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.014816117472946644, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.600045204162598, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.014881886541843414, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.668822288513184, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.19502411782741547, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.650341033935547, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.27465611696243286, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3259904384613037, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.016865991055965424, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.18528246879577637, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.001418713596649468, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.821333885192871, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0084452573210001, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.248059272766113, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.24259352684020996, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.1503267288208, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.19863130152225494, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.320009231567383, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.012293925508856773, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.478866577148438, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.05207940563559532, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.606327056884766, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.09338826686143875, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.382414817810059, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.20947520434856415, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.026443958282471, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.06892281025648117, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4450769424438477, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.009700861759483814, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18709808588027954, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0013212142512202263, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.33608627319336, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.00239845784381032, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.777440071105957, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.07506823539733887, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.936391830444336, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.05984492227435112, "pnorm/_forward_module.model.norm.weight": 22.497509002685547, "gnorm/_forward_module.model.norm.weight": 0.0077978926710784435, "pnorm/_forward_module.lm_head.weight": 231.75697326660156, "gnorm/_forward_module.lm_head.weight": 0.029190029948949814}
{"step": 1405091840, "pnorm/_forward_module.model.embeddings.weight": 103.58805847167969, "gnorm/_forward_module.model.embeddings.weight": 0.17704421281814575, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.842683792114258, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.006420380901545286, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.560210227966309, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.018901217728853226, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.599890232086182, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.01837713085114956, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.665344715118408, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.3167971670627594, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.647508144378662, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.49669933319091797, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3270504474639893, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.029238147661089897, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.18592149019241333, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.004887334071099758, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.818957328796387, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.021291321143507957, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.24338436126709, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.544226348400116, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.147345542907715, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.4028022587299347, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.328561782836914, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.023983793333172798, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.496402740478516, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.1121957078576088, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.618083000183105, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.1950874626636505, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.386783123016357, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.2843721807003021, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.036723613739014, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.08969858288764954, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4479660987854004, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.015464667230844498, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1871987283229828, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0028115359600633383, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.337688446044922, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.004104888066649437, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.780496597290039, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.11050142347812653, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.938718795776367, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.08558712154626846, "pnorm/_forward_module.model.norm.weight": 22.535064697265625, "gnorm/_forward_module.model.norm.weight": 0.005655745044350624, "pnorm/_forward_module.lm_head.weight": 232.09449768066406, "gnorm/_forward_module.lm_head.weight": 0.03340433910489082}
{"step": 1426063360, "pnorm/_forward_module.model.embeddings.weight": 103.60350036621094, "gnorm/_forward_module.model.embeddings.weight": 0.16416800022125244, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.840807914733887, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.005914686713367701, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.5612335205078125, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.020249679684638977, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.601037979125977, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.019892461597919464, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.662753105163574, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.265220046043396, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.64537239074707, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.42388075590133667, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3276550769805908, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.027132300660014153, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.1863531917333603, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.007123937364667654, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.817111015319824, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.020887959748506546, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.239326477050781, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.49789759516716003, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.144819259643555, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.38030514121055603, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.33542251586914, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.021844755858182907, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.51185417175293, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.10156505554914474, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.628463745117188, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.1782480627298355, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.390498161315918, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.19144092500209808, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.045079708099365, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.076044462621212, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4502856731414795, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.022066041827201843, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18725645542144775, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.003930238541215658, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.338775634765625, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.003231297479942441, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.782342910766602, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.08561021089553833, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.940041542053223, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.06499693542718887, "pnorm/_forward_module.model.norm.weight": 22.570106506347656, "gnorm/_forward_module.model.norm.weight": 0.005072867032140493, "pnorm/_forward_module.lm_head.weight": 232.4082794189453, "gnorm/_forward_module.lm_head.weight": 0.043206341564655304}
{"step": 1447034880, "pnorm/_forward_module.model.embeddings.weight": 103.61613464355469, "gnorm/_forward_module.model.embeddings.weight": 0.12512144446372986, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.838805198669434, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.004742810036987066, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.561387062072754, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.01727762073278427, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.601365566253662, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.01790538802742958, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.660131454467773, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.21169213950634003, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.643191337585449, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.29025211930274963, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.328096866607666, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.022128432989120483, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.18677259981632233, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0026070408057421446, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.815399169921875, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.007420595269650221, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.235641479492188, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.2268880307674408, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.142457008361816, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.17601577937602997, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.34183120727539, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.010919305495917797, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.525917053222656, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.04469917714595795, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.638195037841797, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.08313810080289841, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.394107341766357, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.1991826891899109, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.053093910217285, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.07231798768043518, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4527997970581055, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.010432718321681023, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18736669421195984, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0016865036450326443, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.34016227722168, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0024731147568672895, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.784544944763184, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.0804297924041748, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.941776275634766, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.0575227215886116, "pnorm/_forward_module.model.norm.weight": 22.603429794311523, "gnorm/_forward_module.model.norm.weight": 0.006987396162003279, "pnorm/_forward_module.lm_head.weight": 232.70248413085938, "gnorm/_forward_module.lm_head.weight": 0.03319404274225235}
{"step": 1468006400, "pnorm/_forward_module.model.embeddings.weight": 103.6266098022461, "gnorm/_forward_module.model.embeddings.weight": 0.11784376204013824, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.836238861083984, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.004072663839906454, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.5607476234436035, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.017260944470763206, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.600841999053955, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.01741897128522396, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.65709114074707, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.1874098777770996, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.640650749206543, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.2448129653930664, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3283668756484985, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.018170541152358055, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.18723586201667786, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.001684588030911982, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.813399314880371, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0047325328923761845, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.231249809265137, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.166819229722023, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.139720916748047, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.13513806462287903, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.349088668823242, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.00793854147195816, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.541336059570312, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.025467896834015846, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.648601531982422, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.04703597351908684, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.398200988769531, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.18604539334774017, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.062459945678711, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.06106140464544296, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.455012321472168, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.006485276389867067, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18737132847309113, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0004363137704785913, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.341188430786133, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0018950958037748933, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.786161422729492, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.06448686122894287, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.943428993225098, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.05087348073720932, "pnorm/_forward_module.model.norm.weight": 22.635757446289062, "gnorm/_forward_module.model.norm.weight": 0.006375256460160017, "pnorm/_forward_module.lm_head.weight": 232.97764587402344, "gnorm/_forward_module.lm_head.weight": 0.02495025098323822}
{"step": 1488977920, "pnorm/_forward_module.model.embeddings.weight": 103.63502502441406, "gnorm/_forward_module.model.embeddings.weight": 0.12525595724582672, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.833374977111816, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.005176465958356857, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.5603928565979, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.016463054344058037, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.6004509925842285, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.01690272055566311, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.653639316558838, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.21497507393360138, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.637834548950195, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.3260742425918579, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3284919261932373, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.02111223340034485, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.18760965764522552, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0019422966288402677, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.81173324584961, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.010615016333758831, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.227628707885742, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.3044610023498535, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.1375093460083, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.20368997752666473, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.356645584106445, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.011516961269080639, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.557284355163574, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.044732850044965744, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.659327507019043, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.09295615553855896, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.402152061462402, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.1934778392314911, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.071730613708496, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.08009745180606842, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.457697629928589, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.007293563801795244, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18748490512371063, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0009519033483229578, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.342748641967773, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.00477993069216609, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.788857460021973, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.11772514879703522, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.945601463317871, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.09803363680839539, "pnorm/_forward_module.model.norm.weight": 22.666662216186523, "gnorm/_forward_module.model.norm.weight": 0.005294814705848694, "pnorm/_forward_module.lm_head.weight": 233.23519897460938, "gnorm/_forward_module.lm_head.weight": 0.03734014183282852}
{"step": 1509949440, "pnorm/_forward_module.model.embeddings.weight": 103.64129638671875, "gnorm/_forward_module.model.embeddings.weight": 0.10579540580511093, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.830851554870605, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.003935625310987234, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.559988975524902, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.015345334075391293, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.599981784820557, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.015547151677310467, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.65059757232666, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.17402905225753784, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.635280609130859, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.23006419837474823, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3289215564727783, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.018574625253677368, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.187955841422081, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0017780576599761844, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.809961318969727, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.005724478978663683, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.224075317382812, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.18553602695465088, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.135333061218262, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.13858506083488464, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.363773345947266, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.007398849818855524, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.572041511535645, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.02210908755660057, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.669160842895508, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.04836197569966316, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.405993938446045, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.17138950526714325, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.080352783203125, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.06152492016553879, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.459491491317749, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.008609456941485405, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1874905526638031, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0007781560416333377, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.343778610229492, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0023288200609385967, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.79022216796875, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.0680466890335083, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.947285652160645, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.05617404356598854, "pnorm/_forward_module.model.norm.weight": 22.695589065551758, "gnorm/_forward_module.model.norm.weight": 0.00654611736536026, "pnorm/_forward_module.lm_head.weight": 233.4747772216797, "gnorm/_forward_module.lm_head.weight": 0.03232118487358093}
{"step": 1530920960, "pnorm/_forward_module.model.embeddings.weight": 103.64556121826172, "gnorm/_forward_module.model.embeddings.weight": 0.1086268499493599, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.828282356262207, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.004230834078043699, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.559451103210449, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.016282713040709496, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.599457263946533, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.016602661460638046, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.647617340087891, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.181414395570755, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.63282585144043, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.24864788353443146, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3292105197906494, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.019502364099025726, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.1884487271308899, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.004221632145345211, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.808218002319336, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.00768211530521512, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.220748901367188, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.20852912962436676, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.133333206176758, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.159249410033226, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.371028900146484, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.007205495145171881, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.587486267089844, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.02962147630751133, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.679491996765137, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.0605367049574852, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.409473896026611, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.16743820905685425, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.088663101196289, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.06256183981895447, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4615681171417236, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.010568764992058277, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18757808208465576, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0014556868700310588, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.345083236694336, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.003678097389638424, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.792156219482422, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.08591969311237335, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.94920825958252, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.0739172175526619, "pnorm/_forward_module.model.norm.weight": 22.722774505615234, "gnorm/_forward_module.model.norm.weight": 0.00814593955874443, "pnorm/_forward_module.lm_head.weight": 233.69757080078125, "gnorm/_forward_module.lm_head.weight": 0.031673118472099304}
{"step": 1551892480, "pnorm/_forward_module.model.embeddings.weight": 103.64827728271484, "gnorm/_forward_module.model.embeddings.weight": 0.16430118680000305, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.826019287109375, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.007219821680337191, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.55959939956665, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.019968243315815926, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.599719047546387, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.019668856635689735, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.644834995269775, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.27380499243736267, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.630554676055908, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.4796332120895386, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3293273448944092, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.03253737464547157, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.18879783153533936, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.008070887066423893, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.806293487548828, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.022655915468931198, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.217239379882812, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.5860257148742676, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.131220817565918, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.4501757025718689, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.378009796142578, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.02529250644147396, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.601571083068848, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.12166523933410645, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.68887710571289, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.22139380872249603, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.41292667388916, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.2025134563446045, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.096667766571045, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.06709547340869904, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4638869762420654, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.02186456508934498, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18761523067951202, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0037069516256451607, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.346445083618164, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.002364952117204666, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.794303894042969, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.07117582112550735, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.951177597045898, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.05694448947906494, "pnorm/_forward_module.model.norm.weight": 22.7482967376709, "gnorm/_forward_module.model.norm.weight": 0.006440988276153803, "pnorm/_forward_module.lm_head.weight": 233.9044647216797, "gnorm/_forward_module.lm_head.weight": 0.024666497483849525}
{"step": 1572864000, "pnorm/_forward_module.model.embeddings.weight": 103.6497573852539, "gnorm/_forward_module.model.embeddings.weight": 0.09805700927972794, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.823989868164062, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.003167147282510996, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.559340953826904, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.01673746295273304, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.599497318267822, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.015958191826939583, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.642392158508301, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.15308383107185364, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.628547191619873, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.21155616641044617, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3295456171035767, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.017618736252188683, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.18909648060798645, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0012121128384023905, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.805100440979004, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.005179642699658871, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.214725494384766, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.1794663965702057, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.129681587219238, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.12364791333675385, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.38364601135254, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0057256221771240234, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.613346099853516, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.02637496218085289, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.697161674499512, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.045990683138370514, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.416005611419678, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.1349646896123886, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.103688716888428, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.05720939859747887, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.465440273284912, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.0069733960554003716, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18765072524547577, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0006730034365318716, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.347442626953125, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.002268356503918767, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.79577922821045, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.06721076369285583, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.952780723571777, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.05392918735742569, "pnorm/_forward_module.model.norm.weight": 22.772043228149414, "gnorm/_forward_module.model.norm.weight": 0.007601771969348192, "pnorm/_forward_module.lm_head.weight": 234.09507751464844, "gnorm/_forward_module.lm_head.weight": 0.03114181011915207}
{"step": 1593835520, "pnorm/_forward_module.model.embeddings.weight": 103.65003967285156, "gnorm/_forward_module.model.embeddings.weight": 0.0958380326628685, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.822370529174805, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0033017899841070175, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.5591959953308105, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.0152738681063056, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.599560737609863, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.015353784896433353, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.640135288238525, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.15097065269947052, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.626777648925781, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.21059705317020416, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.330203652381897, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.018223002552986145, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.1895022988319397, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0007130720769055188, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.80378246307373, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.005082730669528246, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.211938858032227, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.1786067634820938, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.127945899963379, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.13163448870182037, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.388927459716797, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.007344543002545834, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.624950408935547, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.029046395793557167, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.705277442932129, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.05787286534905434, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.418876647949219, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.12829141318798065, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.109954357147217, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.054928310215473175, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4665684700012207, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.0048477305099368095, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1876564621925354, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0005236894357949495, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.34846305847168, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0027034906670451164, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.797205924987793, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.06717079877853394, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.954315185546875, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.055271293967962265, "pnorm/_forward_module.model.norm.weight": 22.794109344482422, "gnorm/_forward_module.model.norm.weight": 0.005736818537116051, "pnorm/_forward_module.lm_head.weight": 234.2694549560547, "gnorm/_forward_module.lm_head.weight": 0.027238210663199425}
{"step": 1614807040, "pnorm/_forward_module.model.embeddings.weight": 103.64924621582031, "gnorm/_forward_module.model.embeddings.weight": 0.09193377941846848, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.820491790771484, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.003798572113737464, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.5592498779296875, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.013003661297261715, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.599783420562744, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.013490861281752586, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.637657165527344, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.15514330565929413, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.624810695648193, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.2248162031173706, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3303935527801514, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.014461982063949108, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.18967504799365997, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0015853133518248796, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.802528381347656, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.007419890724122524, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.20942211151123, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.2101893275976181, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.126388549804688, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.15857809782028198, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.394420623779297, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.008675494231283665, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.636146545410156, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.04257776215672493, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.713037490844727, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.07534719258546829, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.421795845031738, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.1400366723537445, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.11634635925293, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.05608321353793144, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4684157371520996, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.0074734860099852085, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1877119094133377, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0011649065418168902, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.349689483642578, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0023951921612024307, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.798954963684082, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.06642499566078186, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.955988883972168, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.05295579880475998, "pnorm/_forward_module.model.norm.weight": 22.814533233642578, "gnorm/_forward_module.model.norm.weight": 0.005907109938561916, "pnorm/_forward_module.lm_head.weight": 234.429443359375, "gnorm/_forward_module.lm_head.weight": 0.028542593121528625}
{"step": 1635778560, "pnorm/_forward_module.model.embeddings.weight": 103.64778137207031, "gnorm/_forward_module.model.embeddings.weight": 0.10122296214103699, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.818528175354004, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0032898082863539457, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.559138298034668, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.015721391886472702, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.599639415740967, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.014972168952226639, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.635383605957031, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.15135514736175537, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.622967720031738, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.22233644127845764, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3303550481796265, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.017947262153029442, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.18984106183052063, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0016764559550210834, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.801271438598633, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.005142096430063248, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.206975936889648, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.17686349153518677, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.124924659729004, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.12599635124206543, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.40010643005371, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.007029845844954252, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.647396087646484, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.025393398478627205, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.720939636230469, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.05388186126947403, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.42457914352417, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.15268011391162872, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.1225738525390625, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.054625801742076874, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4702341556549072, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.00843189936131239, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18776527047157288, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.001091569778509438, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.350820541381836, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.00209855567663908, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.800604820251465, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.06219576671719551, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.957615852355957, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.048637814819812775, "pnorm/_forward_module.model.norm.weight": 22.833410263061523, "gnorm/_forward_module.model.norm.weight": 0.006782717537134886, "pnorm/_forward_module.lm_head.weight": 234.5764923095703, "gnorm/_forward_module.lm_head.weight": 0.02602948434650898}
{"step": 1656750080, "pnorm/_forward_module.model.embeddings.weight": 103.64561462402344, "gnorm/_forward_module.model.embeddings.weight": 0.09801327437162399, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.816635131835938, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0034742476418614388, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.558749198913574, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.014659232459962368, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.599276542663574, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.014489964582026005, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.633196830749512, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.15535297989845276, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.621146202087402, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.22696048021316528, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3305662870407104, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.018990658223628998, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.19013448059558868, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.003102600108832121, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.80008602142334, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.007771460339426994, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.204726219177246, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.21997599303722382, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.123587608337402, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.1731049120426178, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.405380249023438, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.010205782018601894, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.658011436462402, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.042745549231767654, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.72813606262207, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.08022693544626236, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.4272027015686035, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.13395126163959503, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.128351211547852, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.04947648197412491, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4718940258026123, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.00734157906845212, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18779070675373077, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.001089382218196988, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.3515682220459, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0015134315472096205, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.80158519744873, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.05429365858435631, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.958773612976074, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.04362107440829277, "pnorm/_forward_module.model.norm.weight": 22.85065269470215, "gnorm/_forward_module.model.norm.weight": 0.006834856234490871, "pnorm/_forward_module.lm_head.weight": 234.70916748046875, "gnorm/_forward_module.lm_head.weight": 0.02419815957546234}
{"step": 1677721600, "pnorm/_forward_module.model.embeddings.weight": 103.6429443359375, "gnorm/_forward_module.model.embeddings.weight": 0.09187097102403641, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.815103530883789, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0030010004993528128, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.559019565582275, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.013558976352214813, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.599661350250244, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.013690986670553684, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.631175518035889, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.14171825349330902, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.619488716125488, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.18821211159229279, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3308366537094116, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.015133887529373169, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.1903391182422638, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0011568169575184584, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.798959732055664, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0041819303296506405, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.20253849029541, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.13850583136081696, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.122276306152344, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.10360332578420639, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.409862518310547, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.006079886574298143, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.666851997375488, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.019848648458719254, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.734127044677734, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.03596179187297821, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.429611682891846, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.1346137374639511, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.133304595947266, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.04960794374346733, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.472990036010742, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.005139954853802919, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.187814861536026, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00042486831080168486, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.352628707885742, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0017375074094161391, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.802957534790039, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.05495809391140938, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.960158348083496, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.044986542314291, "pnorm/_forward_module.model.norm.weight": 22.866422653198242, "gnorm/_forward_module.model.norm.weight": 0.00649062916636467, "pnorm/_forward_module.lm_head.weight": 234.8295135498047, "gnorm/_forward_module.lm_head.weight": 0.026147395372390747}
{"step": 1698693120, "pnorm/_forward_module.model.embeddings.weight": 103.63983154296875, "gnorm/_forward_module.model.embeddings.weight": 0.08289172500371933, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.813396453857422, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.003003776539117098, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.558685302734375, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.013911940157413483, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.599318027496338, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.013745193369686604, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.629131317138672, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.13471059501171112, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.617843151092529, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.20130059123039246, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.330912709236145, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.017752328887581825, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.19047978520393372, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.002492929343134165, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.798121452331543, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0073151253163814545, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.200970649719238, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.20506422221660614, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.121321678161621, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.15628722310066223, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.414316177368164, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.008878161199390888, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.676041603088379, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.038248687982559204, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.740294456481934, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.06784962862730026, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.431838035583496, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.1065603494644165, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.137962818145752, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.043197136372327805, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4741923809051514, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.006480084266513586, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18788619339466095, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.000964281614869833, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.353567123413086, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0014282825868576765, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.804049491882324, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.04804975911974907, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.961276054382324, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.04032106325030327, "pnorm/_forward_module.model.norm.weight": 22.880794525146484, "gnorm/_forward_module.model.norm.weight": 0.006505224853754044, "pnorm/_forward_module.lm_head.weight": 234.9394989013672, "gnorm/_forward_module.lm_head.weight": 0.02262982167303562}
{"step": 1719664640, "pnorm/_forward_module.model.embeddings.weight": 103.63652801513672, "gnorm/_forward_module.model.embeddings.weight": 0.08616653829813004, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.811971664428711, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.002701058518141508, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.558584690093994, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.013308503665030003, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.599186897277832, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.013127843849360943, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.627389907836914, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.12540312111377716, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.616451263427734, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.17013874650001526, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3311599493026733, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.014318534173071384, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.19065962731838226, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0008342101355083287, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.797104835510254, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.003616980044171214, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.199065208435059, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.13767307996749878, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.120231628417969, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.10778291523456573, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.4185733795166, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.005613071843981743, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.684854507446289, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.024209287017583847, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.746406555175781, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.0433703288435936, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.433850288391113, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.10258380323648453, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.142332553863525, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.04153333231806755, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4753644466400146, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.004316170699894428, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18791471421718597, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0002737058384809643, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.354385375976562, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0017775761662051082, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.804932594299316, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.05081931874155998, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.962381362915039, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.04282099008560181, "pnorm/_forward_module.model.norm.weight": 22.893892288208008, "gnorm/_forward_module.model.norm.weight": 0.005970904603600502, "pnorm/_forward_module.lm_head.weight": 235.0376434326172, "gnorm/_forward_module.lm_head.weight": 0.02442299760878086}
{"step": 1740636160, "pnorm/_forward_module.model.embeddings.weight": 103.63321685791016, "gnorm/_forward_module.model.embeddings.weight": 0.08034508675336838, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.810554504394531, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.002530248137190938, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.558361530303955, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.01239805854856968, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.599029541015625, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.012525309808552265, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.62567663192749, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.12018518149852753, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.615057468414307, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.15995703637599945, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3314000368118286, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.01195178646594286, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.1908220499753952, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0006484670448116958, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.796159744262695, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0033453747164458036, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.197332382202148, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.12037700414657593, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.11922836303711, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.08912774175405502, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.42261505126953, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.004254516214132309, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.692900657653809, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.016869550570845604, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.751951217651367, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.0318860299885273, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.435722827911377, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.09951648861169815, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.146397590637207, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.04257418215274811, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4766547679901123, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.004464464262127876, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18795162439346313, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0005283643840812147, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.355382919311523, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0015788457822054625, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.806079864501953, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.04727236554026604, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.963574409484863, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.040075283497571945, "pnorm/_forward_module.model.norm.weight": 22.905685424804688, "gnorm/_forward_module.model.norm.weight": 0.006686368957161903, "pnorm/_forward_module.lm_head.weight": 235.12477111816406, "gnorm/_forward_module.lm_head.weight": 0.022243894636631012}
{"step": 1761607680, "pnorm/_forward_module.model.embeddings.weight": 103.62994384765625, "gnorm/_forward_module.model.embeddings.weight": 0.08266036212444305, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.809195518493652, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.003152470337226987, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.558084487915039, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.013165130279958248, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.5987958908081055, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.01368713565170765, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.624079704284668, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.1293638050556183, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.613790988922119, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.1849217414855957, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.331654667854309, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.01663072407245636, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.19101576507091522, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0023895364720374346, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.795228004455566, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.005826966371387243, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.195724487304688, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.163690984249115, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.118237495422363, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.1211843490600586, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.42642593383789, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.00721492525190115, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.700177192687988, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.03144155442714691, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.756881713867188, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.056844890117645264, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.4376912117004395, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.1133141741156578, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.150362968444824, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.045510418713092804, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.477745771408081, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.0078266067430377, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18798556923866272, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0010078295599669218, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.356163024902344, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0017375126481056213, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.806928634643555, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.05109648033976555, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.964497566223145, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.042152296751737595, "pnorm/_forward_module.model.norm.weight": 22.91620445251465, "gnorm/_forward_module.model.norm.weight": 0.006398769095540047, "pnorm/_forward_module.lm_head.weight": 235.2021484375, "gnorm/_forward_module.lm_head.weight": 0.023734936490654945}
{"step": 1782579200, "pnorm/_forward_module.model.embeddings.weight": 103.62677764892578, "gnorm/_forward_module.model.embeddings.weight": 0.0847160667181015, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.807971000671387, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0026793747674673796, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.558176040649414, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.012961114756762981, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.598979949951172, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.013274489901959896, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.622526168823242, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.12791311740875244, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.612554550170898, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.17203935980796814, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3315733671188354, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.013847611844539642, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.19108846783638, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.000871551688760519, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.794537544250488, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.004626357927918434, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.194501876831055, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.13932938873767853, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.11755657196045, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.1003355085849762, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.429855346679688, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.005592132918536663, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.706772804260254, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.022851044312119484, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.76142692565918, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.0442795492708683, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.43936014175415, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.09573189914226532, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.153636932373047, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.042640820145606995, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4790565967559814, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.00881385337561369, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18806280195713043, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0012303710682317615, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.356739044189453, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0012383325956761837, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.807534217834473, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.046876709908246994, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.965326309204102, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03693182393908501, "pnorm/_forward_module.model.norm.weight": 22.925498962402344, "gnorm/_forward_module.model.norm.weight": 0.00626926077529788, "pnorm/_forward_module.lm_head.weight": 235.2700653076172, "gnorm/_forward_module.lm_head.weight": 0.0240127295255661}
{"step": 1803550720, "pnorm/_forward_module.model.embeddings.weight": 103.62367248535156, "gnorm/_forward_module.model.embeddings.weight": 0.08644232153892517, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.80698013305664, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.002761433832347393, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.558069229125977, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.013077820651233196, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.598880767822266, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.012820112518966198, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.621295928955078, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.12843948602676392, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.61152458190918, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.17604312300682068, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.331707239151001, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.014287545345723629, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.19121916592121124, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.000933428353164345, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.793768882751465, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.004748815204948187, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.19313907623291, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.1508481800556183, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.116776466369629, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.11394888907670975, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.432863235473633, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.006815133150666952, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.712421417236328, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.02883102372288704, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.765277862548828, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.05277755856513977, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.440796375274658, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.10800641775131226, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.156469345092773, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.04153113439679146, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4798941612243652, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.005681001581251621, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1880849152803421, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0008494788780808449, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.357341766357422, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0015076639829203486, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.808149337768555, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.04718569666147232, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.966064453125, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03755795583128929, "pnorm/_forward_module.model.norm.weight": 22.93364143371582, "gnorm/_forward_module.model.norm.weight": 0.006403029430657625, "pnorm/_forward_module.lm_head.weight": 235.32948303222656, "gnorm/_forward_module.lm_head.weight": 0.023064803332090378}
{"step": 1824522240, "pnorm/_forward_module.model.embeddings.weight": 103.62081909179688, "gnorm/_forward_module.model.embeddings.weight": 0.07803239673376083, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.806035041809082, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.002666782820597291, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.557705402374268, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.012948189862072468, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.598526954650879, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.013323846273124218, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.620242595672607, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.11938741058111191, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.610647201538086, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.1609494388103485, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3318712711334229, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.016500135883688927, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.1913725882768631, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0027964776381850243, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.793034553527832, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.002890744712203741, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.191913604736328, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.11881794035434723, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.116046905517578, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.08902058750391006, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.43550682067871, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.004788435064256191, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.717662811279297, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.020493770018219948, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.76888656616211, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.03535052016377449, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.442033290863037, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.09765446931123734, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.158902168273926, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.042876023799180984, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.480487108230591, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.0042640226893126965, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18808166682720184, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00037256808718666434, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.357912063598633, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0013814476551488042, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.808700561523438, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.047342877835035324, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.96677017211914, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03658328950405121, "pnorm/_forward_module.model.norm.weight": 22.940780639648438, "gnorm/_forward_module.model.norm.weight": 0.006682043895125389, "pnorm/_forward_module.lm_head.weight": 235.38096618652344, "gnorm/_forward_module.lm_head.weight": 0.022811653092503548}
{"step": 1845493760, "pnorm/_forward_module.model.embeddings.weight": 103.61817932128906, "gnorm/_forward_module.model.embeddings.weight": 0.07782446593046188, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.80517578125, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.002504454692825675, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.557565689086914, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.01340517494827509, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.59840726852417, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.013442954048514366, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.619232177734375, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.11152467131614685, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.60983419418335, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.14752493798732758, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3319146633148193, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.015880784019827843, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.19147954881191254, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0012307025026530027, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.792512893676758, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.003648537676781416, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.190972328186035, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.12230085581541061, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.115557670593262, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.08983058482408524, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.43797492980957, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.00516059435904026, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.72248649597168, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.023044586181640625, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.772146224975586, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.04150168597698212, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.443068027496338, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.08055692911148071, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.161004066467285, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.03693225607275963, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.481091022491455, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.005622027907520533, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.188117116689682, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0007476043538190424, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.358348846435547, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.00130397395696491, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.809069633483887, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.04187049716711044, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.967272758483887, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.033786118030548096, "pnorm/_forward_module.model.norm.weight": 22.946928024291992, "gnorm/_forward_module.model.norm.weight": 0.006158347241580486, "pnorm/_forward_module.lm_head.weight": 235.42483520507812, "gnorm/_forward_module.lm_head.weight": 0.021438485011458397}
{"step": 1866465280, "pnorm/_forward_module.model.embeddings.weight": 103.61577606201172, "gnorm/_forward_module.model.embeddings.weight": 0.07993628829717636, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.80434799194336, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0028138342313468456, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.557397842407227, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.01365803461521864, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.598264694213867, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.013576099649071693, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.618297100067139, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.12042068690061569, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.6091084480285645, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.18978054821491241, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3319056034088135, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.016098957508802414, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.19156663119792938, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0025589519646018744, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.792078018188477, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.007882699370384216, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.190186500549316, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.20941570401191711, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.115150451660156, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.15496282279491425, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.440139770507812, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0076581635512411594, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.726560592651367, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.041278716176748276, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.774916648864746, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.07510469108819962, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.444084167480469, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.07891583442687988, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.163023471832275, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.03708941861987114, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4814817905426025, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.005036697257310152, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18812641501426697, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0007062721415422857, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.358779907226562, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.001254483824595809, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.809483528137207, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.041627656668424606, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.967809677124023, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03262992948293686, "pnorm/_forward_module.model.norm.weight": 22.952165603637695, "gnorm/_forward_module.model.norm.weight": 0.005531106609851122, "pnorm/_forward_module.lm_head.weight": 235.46200561523438, "gnorm/_forward_module.lm_head.weight": 0.020652441307902336}
{"step": 1887436800, "pnorm/_forward_module.model.embeddings.weight": 103.61371612548828, "gnorm/_forward_module.model.embeddings.weight": 0.0787525326013565, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.80378532409668, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0024498358834534883, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.557407855987549, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.012692399322986603, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.5982584953308105, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.012882711365818977, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.617563724517822, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.11572905629873276, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.608509063720703, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.15476872026920319, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3319425582885742, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.014326036907732487, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.19161003828048706, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0007170020253397524, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.79173469543457, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0031240398529917, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.1895751953125, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.11988247185945511, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.114808082580566, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.08904101699590683, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.441884994506836, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0050697787664830685, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.729778289794922, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.022221332415938377, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.777134895324707, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.041143305599689484, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.444902420043945, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.0933777242898941, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.164642810821533, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.03839138522744179, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.481956958770752, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.005228027235716581, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18815241754055023, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0005156538682058454, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.35912322998047, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0014631884405389428, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.809797286987305, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.04516245424747467, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.968246459960938, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.037380848079919815, "pnorm/_forward_module.model.norm.weight": 22.95655059814453, "gnorm/_forward_module.model.norm.weight": 0.006503382232040167, "pnorm/_forward_module.lm_head.weight": 235.49302673339844, "gnorm/_forward_module.lm_head.weight": 0.022430241107940674}
{"step": 1908408320, "pnorm/_forward_module.model.embeddings.weight": 103.6119155883789, "gnorm/_forward_module.model.embeddings.weight": 0.061752982437610626, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.803231239318848, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0020764567889273167, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.557451248168945, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.011782156303524971, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.598294734954834, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.012392633594572544, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.616882801055908, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.09227524697780609, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.607944488525391, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.12089045345783234, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.331900954246521, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.01159113459289074, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.1916564553976059, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0011744694784283638, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.791444778442383, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.002493108855560422, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.189059257507324, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.0896855816245079, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.114507675170898, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.06430362164974213, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.443410873413086, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.003353995969519019, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.732558250427246, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.01345506589859724, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.778997421264648, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.02587984688580036, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.445626258850098, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.06454334408044815, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.1660308837890625, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.032875481992959976, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4823720455169678, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.005053477827459574, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18816712498664856, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0005001687677577138, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.359472274780273, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.001185974688269198, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.810127258300781, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.036627646535634995, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.968609809875488, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.030427148565649986, "pnorm/_forward_module.model.norm.weight": 22.960145950317383, "gnorm/_forward_module.model.norm.weight": 0.006145712919533253, "pnorm/_forward_module.lm_head.weight": 235.5187530517578, "gnorm/_forward_module.lm_head.weight": 0.02056948095560074}
{"step": 1929379840, "pnorm/_forward_module.model.embeddings.weight": 103.6103744506836, "gnorm/_forward_module.model.embeddings.weight": 0.06788242608308792, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.802740097045898, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0021449842024594545, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.557320594787598, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.01187476608902216, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.598170280456543, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.012246665544807911, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.616305828094482, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.0958091989159584, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.607468605041504, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.1319165974855423, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3320486545562744, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.011768150143325329, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.19176477193832397, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.00030450208578258753, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.791230201721191, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.003427924122661352, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.188652992248535, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.1199691891670227, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.114307403564453, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.08750760555267334, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.444684982299805, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0047916523180902, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.734820365905762, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.019402174279093742, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.780488967895508, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.03621654585003853, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.446170806884766, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.06654224544763565, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.16710090637207, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.033729538321495056, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4825797080993652, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.004324703477323055, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18817363679409027, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0005243554478511214, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.35974884033203, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.001221492770127952, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.810369491577148, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.03856181353330612, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.968941688537598, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.031818464398384094, "pnorm/_forward_module.model.norm.weight": 22.96303367614746, "gnorm/_forward_module.model.norm.weight": 0.0060455938801169395, "pnorm/_forward_module.lm_head.weight": 235.5392303466797, "gnorm/_forward_module.lm_head.weight": 0.02077570930123329}
{"step": 1950351360, "pnorm/_forward_module.model.embeddings.weight": 103.609130859375, "gnorm/_forward_module.model.embeddings.weight": 0.06362485885620117, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.802382469177246, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0020950071047991514, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.557267189025879, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.011534147895872593, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.598130702972412, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.012018506415188313, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.615850448608398, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.08980406820774078, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.6070942878723145, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.12246444821357727, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.332080364227295, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.012496226467192173, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.19180820882320404, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0007995864725671709, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.790985107421875, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0024598930031061172, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.188214302062988, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.09649191051721573, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.114068031311035, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.06951024383306503, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.445756912231445, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0035883313976228237, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.73677921295166, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.01609273999929428, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.781790733337402, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.029874471947550774, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.446654319763184, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.06144121661782265, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.168004035949707, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.032119497656822205, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4828810691833496, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.005290155299007893, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1881972998380661, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0005442544352263212, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.359966278076172, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0010231242049485445, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.810540199279785, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.03668520972132683, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.9691743850708, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.028973815962672234, "pnorm/_forward_module.model.norm.weight": 22.96527862548828, "gnorm/_forward_module.model.norm.weight": 0.006038870196789503, "pnorm/_forward_module.lm_head.weight": 235.55499267578125, "gnorm/_forward_module.lm_head.weight": 0.020749123767018318}
{"step": 1971322880, "pnorm/_forward_module.model.embeddings.weight": 103.60818481445312, "gnorm/_forward_module.model.embeddings.weight": 0.06367079168558121, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.802074432373047, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.002049713861197233, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.557317733764648, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.011804136447608471, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.598179817199707, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.012074603699147701, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.6154704093933105, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.09126151353120804, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.606782913208008, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.12453517317771912, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.332045316696167, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.015808310359716415, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.19182562828063965, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0029649233911186457, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.790837287902832, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0029986808076500893, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.187959671020508, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.10028615593910217, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.113934516906738, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.06933270394802094, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.446596145629883, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.004000569693744183, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.738297462463379, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.014700490050017834, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.782800674438477, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.029828723520040512, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.447035789489746, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.062005415558815, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.168659687042236, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.03214764967560768, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.483037233352661, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.0042708683758974075, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18819640576839447, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00044440108467824757, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.360212326049805, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0011343928053975105, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.810802459716797, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.03723415359854698, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.969441413879395, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03177588805556297, "pnorm/_forward_module.model.norm.weight": 22.96697998046875, "gnorm/_forward_module.model.norm.weight": 0.006746912375092506, "pnorm/_forward_module.lm_head.weight": 235.56678771972656, "gnorm/_forward_module.lm_head.weight": 0.020924439653754234}
{"step": 1992294400, "pnorm/_forward_module.model.embeddings.weight": 103.60746765136719, "gnorm/_forward_module.model.embeddings.weight": 0.06185843050479889, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.801860809326172, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.001964928349480033, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.557354927062988, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.011929317377507687, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.598210334777832, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.012258412316441536, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.615202903747559, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.09008197486400604, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.606560707092285, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.12035718560218811, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.332025170326233, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.013991420157253742, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.19182667136192322, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.001999249681830406, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.790685653686523, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.002769144019111991, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.187726020812988, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.09968473017215729, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.11380386352539, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.07081795483827591, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.44725227355957, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.004237509798258543, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.73953914642334, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.016942961141467094, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.7836275100708, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.032075539231300354, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.447328567504883, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.0639648288488388, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.169172763824463, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.031597595661878586, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4831483364105225, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.0039054376538842916, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18819762766361237, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0004114302573725581, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.36037254333496, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0010444376384839416, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.810956001281738, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.0369117334485054, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.969614028930664, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03093593195080757, "pnorm/_forward_module.model.norm.weight": 22.96820831298828, "gnorm/_forward_module.model.norm.weight": 0.006395900622010231, "pnorm/_forward_module.lm_head.weight": 235.57528686523438, "gnorm/_forward_module.lm_head.weight": 0.02055657096207142}
{"step": 2013265920, "pnorm/_forward_module.model.embeddings.weight": 103.60697937011719, "gnorm/_forward_module.model.embeddings.weight": 0.06173189729452133, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.80172061920166, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0019464956130832434, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.557343482971191, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.01215201523154974, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.598194122314453, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.012105308473110199, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.615021228790283, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.08785746991634369, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.606417179107666, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.11702943593263626, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3320688009262085, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.012953666038811207, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.19185538589954376, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0013159217778593302, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.790581703186035, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0023759861942380667, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.187564849853516, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.0904967188835144, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.113709449768066, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.05973035469651222, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.44770622253418, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0035065789707005024, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.740375518798828, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.012835507281124592, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.784173965454102, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.026509685441851616, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.447534084320068, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.05482323095202446, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.169529914855957, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.031052442267537117, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4832701683044434, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.005124165676534176, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18820630013942719, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00047484718379564583, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.36043357849121, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.001084566698409617, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.810989379882812, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.036283109337091446, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.969686508178711, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.030498795211315155, "pnorm/_forward_module.model.norm.weight": 22.96903419494629, "gnorm/_forward_module.model.norm.weight": 0.006242914590984583, "pnorm/_forward_module.lm_head.weight": 235.58099365234375, "gnorm/_forward_module.lm_head.weight": 0.020710887387394905}
{"step": 2034237440, "pnorm/_forward_module.model.embeddings.weight": 103.60668182373047, "gnorm/_forward_module.model.embeddings.weight": 0.06604793667793274, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.801628112792969, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0020495743956416845, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.557305812835693, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.011613378301262856, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.5981526374816895, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.012156281620264053, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.614917755126953, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.09165652841329575, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.606335639953613, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.1242016851902008, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3320958614349365, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.014143591746687889, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.19187231361865997, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.001801070524379611, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.790509223937988, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0035877081099897623, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.187442779541016, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.11307763308286667, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.113639831542969, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.08118963986635208, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.447982788085938, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.00433358084410429, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.740877151489258, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.019295724108815193, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.784507751464844, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.033615726977586746, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.447664260864258, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.054559558629989624, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.169749736785889, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.03081623837351799, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.483349561691284, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.006042997352778912, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18821482360363007, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0008770317072048783, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.360490798950195, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0009369998006150126, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.811039924621582, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.03558822721242905, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.969745635986328, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.028734931722283363, "pnorm/_forward_module.model.norm.weight": 22.96953582763672, "gnorm/_forward_module.model.norm.weight": 0.006650272291153669, "pnorm/_forward_module.lm_head.weight": 235.58450317382812, "gnorm/_forward_module.lm_head.weight": 0.020221102982759476}
{"step": 2055208960, "pnorm/_forward_module.model.embeddings.weight": 103.60652160644531, "gnorm/_forward_module.model.embeddings.weight": 0.05936156585812569, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 15.8015775680542, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0018662511138245463, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 7.557305812835693, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.012016385793685913, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 7.598154067993164, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.011950550600886345, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 5.614856719970703, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.08681917935609818, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 5.60628604888916, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.11506971716880798, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.3321036100387573, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.011788624338805676, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.19187764823436737, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0011800781358033419, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 15.79047966003418, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0023262440226972103, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 14.187394142150879, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.08802345395088196, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 10.113612174987793, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.05851119011640549, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 17.448129653930664, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0032797076273709536, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.741141319274902, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.013045825064182281, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 8.784686088562012, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.025647467002272606, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 6.447733402252197, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.05667996034026146, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 7.169867038726807, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.03115263767540455, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.4833855628967285, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.003922324161976576, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.18821613490581512, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00038530403980985284, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 16.36051368713379, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0009817547397688031, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 15.811050415039062, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.035792771726846695, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 10.969770431518555, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.028753137215971947, "pnorm/_forward_module.model.norm.weight": 22.969799041748047, "gnorm/_forward_module.model.norm.weight": 0.006390242371708155, "pnorm/_forward_module.lm_head.weight": 235.5863037109375, "gnorm/_forward_module.lm_head.weight": 0.020662635564804077}