diff --git "a/metrics/jsonlines/norm.jsonl" "b/metrics/jsonlines/norm.jsonl" new file mode 100644--- /dev/null +++ "b/metrics/jsonlines/norm.jsonl" @@ -0,0 +1,98 @@ +{"step": 20971520, "pnorm/_forward_module.model.embeddings.weight": 101.81584930419922, "gnorm/_forward_module.model.embeddings.weight": 0.06869454681873322, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.617353439331055, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0016509891720488667, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 10.29729175567627, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.004574252292513847, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 10.30891227722168, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.004276220686733723, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.288520812988281, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.04839729890227318, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.308958053588867, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.045711617916822433, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.290590524673462, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.0063759456388652325, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.009432925842702389, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0029194196686148643, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.644020080566406, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0010992292081937194, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 25.261024475097656, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.03170783072710037, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 17.826223373413086, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.023950468748807907, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.61376953125, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0010983363026753068, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.277763366699219, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.0028180419467389584, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 10.29810905456543, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.002785311546176672, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.281525611877441, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.029189206659793854, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.294316291809082, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.029758155345916748, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.2956374883651733, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.012822494842112064, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0038774865679442883, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0011934550711885095, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.629898071289062, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0008333436562679708, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 25.243446350097656, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.021816885098814964, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 17.836523056030273, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.016327911987900734, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.617870330810547, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0007926493999548256, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 10.331307411193848, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.0019470715196803212, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 10.299418449401855, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.0018506657797843218, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.290188789367676, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.022145478054881096, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.265390396118164, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.028168268501758575, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 1.2813464403152466, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.010303563438355923, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.00432300241664052, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0006671957089565694, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.625865936279297, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0005588842905126512, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 25.246349334716797, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.01609724946320057, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 17.8576717376709, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.013793759979307652, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.62416648864746, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0006023735622875392, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 10.327676773071289, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.0012406462337821722, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 10.303221702575684, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.0011031782487407327, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.276890754699707, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.0187190230935812, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 10.281161308288574, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.02795841544866562, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 1.2862451076507568, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.007216785568743944, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.003768933704122901, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0004013867874164134, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.627944946289062, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.00043084961362183094, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 25.22796630859375, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.01563522219657898, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 17.861671447753906, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.01501774974167347, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 22.628366470336914, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.000659311655908823, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 10.318748474121094, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.0006316258222796023, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 10.284245491027832, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.0005636459682136774, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 10.28686237335205, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.018689164891839027, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 10.314923286437988, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.029897956177592278, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 1.2906819581985474, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.004329283721745014, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.004539692308753729, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0002095368254231289, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 22.633237838745117, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0005545919993892312, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 25.215951919555664, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.01834973506629467, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 17.84881019592285, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.0184263177216053, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.629703521728516, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0008004084811545908, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 10.302168846130371, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.0003756276855710894, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 10.29596996307373, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.0003510359092615545, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 10.293839454650879, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.022132596001029015, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 10.319414138793945, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.03259901702404022, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 1.3089790344238281, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.0027386643923819065, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.004615094978362322, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.00012222363147884607, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.633485794067383, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0007963152602314949, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 25.23259162902832, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.02013680897653103, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 17.823854446411133, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.020861024037003517, "pnorm/_forward_module.model.norm.weight": 22.66058349609375, "gnorm/_forward_module.model.norm.weight": 0.0569557286798954, "pnorm/_forward_module.lm_head.weight": 101.96174621582031, "gnorm/_forward_module.lm_head.weight": 0.9876343607902527} +{"step": 41943040, "pnorm/_forward_module.model.embeddings.weight": 103.18702697753906, "gnorm/_forward_module.model.embeddings.weight": 0.19666098058223724, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.655452728271484, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.007538936100900173, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 10.683125495910645, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.009629827924072742, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 10.70488166809082, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.00856009405106306, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.505414009094238, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.19911183416843414, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.514557838439941, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.20896027982234955, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.362101674079895, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.010439998470246792, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.03535962104797363, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0011666429927572608, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.68776512145996, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.006809588987380266, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 25.788516998291016, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.17908670008182526, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 18.19399642944336, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.14734670519828796, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.609352111816406, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0055291480384767056, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.484845161437988, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.009505169466137886, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 10.512873649597168, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.008511153049767017, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.437174797058105, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.1525234878063202, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.449408531188965, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.14439615607261658, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.3927792310714722, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.007044985890388489, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0062977164052426815, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0003885702171828598, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.64682388305664, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.003475162200629711, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 25.705564498901367, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.0988364964723587, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 18.177331924438477, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.08906685560941696, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.60630989074707, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.00349959684535861, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 10.539627075195312, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.006151949055492878, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 10.507065773010254, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.005213597323745489, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.443143844604492, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.09138711541891098, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.419620513916016, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.08430248498916626, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 1.360091209411621, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.005488235969096422, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.010254627093672752, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.00031645759008824825, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.622915267944336, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.002194090746343136, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 25.669544219970703, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.06746447831392288, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 18.16973876953125, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.05668458715081215, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.611255645751953, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.002679024590179324, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 10.506802558898926, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.005259388126432896, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 10.47215747833252, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.005143946968019009, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.437226295471191, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.06982395052909851, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 10.446187973022461, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.0627862885594368, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 1.3637146949768066, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.006106208544224501, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.009373653680086136, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.00021486994228325784, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.610443115234375, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0017554149962961674, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 25.63470458984375, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.05413088575005531, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 18.16493034362793, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.04722768813371658, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 22.614177703857422, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.002250468358397484, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 10.479959487915039, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.0038514207117259502, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 10.443544387817383, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.003003370249643922, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 10.455085754394531, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.05936587601900101, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 10.482765197753906, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.06289776414632797, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 1.3570282459259033, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.004071301314979792, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.013678374700248241, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.00024563088663853705, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 22.601470947265625, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.002035983605310321, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 25.60074806213379, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.0567190907895565, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 18.13294792175293, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.05034729093313217, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.615615844726562, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0019135575275868177, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 10.453218460083008, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.002424027770757675, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 10.444467544555664, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.002523045288398862, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 10.466025352478027, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.051584143191576004, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 10.491785049438477, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.06395812332630157, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 1.3542327880859375, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.0030450790654867887, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.011473423801362514, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.00015824215370230377, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.592199325561523, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0020691128447651863, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 25.604280471801758, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.05284876748919487, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 18.095787048339844, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.047348182648420334, "pnorm/_forward_module.model.norm.weight": 22.82024383544922, "gnorm/_forward_module.model.norm.weight": 0.02021314948797226, "pnorm/_forward_module.lm_head.weight": 109.1305160522461, "gnorm/_forward_module.lm_head.weight": 0.613010585308075} +{"step": 62914560, "pnorm/_forward_module.model.embeddings.weight": 104.88796997070312, "gnorm/_forward_module.model.embeddings.weight": 0.1025189757347107, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.719860076904297, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.004939389415085316, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 11.128229141235352, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.011005526408553123, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 11.163755416870117, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.008986598812043667, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.692118644714355, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.10085190087556839, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.692058563232422, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.10163657367229462, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.4433810710906982, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.008346364833414555, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.05477088689804077, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.001424093614332378, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.73202896118164, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0054967645555734634, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.1516056060791, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.08966707438230515, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 18.433656692504883, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.08510471880435944, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.62489128112793, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.003507278859615326, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.60677719116211, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.005097005981951952, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 10.63875961303711, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.005461358465254307, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.552125930786133, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.06709662079811096, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.560407638549805, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.06704068183898926, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.4975993633270264, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.004895268008112907, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.012294040992856026, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00029022671515122056, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.661087036132812, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.002694769762456417, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 25.98029136657715, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.05590150132775307, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 18.379966735839844, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.06475444883108139, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.602642059326172, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0020425161346793175, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 10.660996437072754, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.0035288354847580194, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 10.6280517578125, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.003947612829506397, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.521777153015137, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.041523296386003494, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.5033540725708, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.03891300782561302, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 1.4482871294021606, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.0038307325448840857, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.01765078864991665, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.00025077848113141954, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.624366760253906, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.001541845384053886, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 25.910226821899414, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.038379643112421036, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 18.356290817260742, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.04128168150782585, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.592409133911133, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.001665607444010675, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 10.610779762268066, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.0038814737927168608, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 10.570647239685059, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.003478498198091984, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.495216369628906, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.03607863560318947, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 10.514289855957031, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.030424360185861588, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 1.4417190551757812, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.004751028958708048, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.017058830708265305, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0002672166156116873, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.600055694580078, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0017562283901497722, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 25.862957000732422, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.03899180144071579, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 18.342039108276367, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.03161240369081497, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 22.587947845458984, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0015075609553605318, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 10.5874662399292, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.0044016242027282715, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 10.546067237854004, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.004575047176331282, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 10.500486373901367, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.035919733345508575, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 10.536487579345703, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.028524480760097504, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 1.41974937915802, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.006768654100596905, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.02287006936967373, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0004476241010706872, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 22.57424545288086, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0027162842452526093, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 25.807453155517578, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.05025612935423851, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 18.28801155090332, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.03361076861619949, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.585412979125977, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0015436011599376798, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 10.561579704284668, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.006316781509667635, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 10.555158615112305, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.006311678793281317, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 10.514095306396484, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.04088365659117699, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 10.548105239868164, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.03263513371348381, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 1.3972113132476807, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.005797999911010265, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.015622096136212349, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0002732254797592759, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.54882049560547, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.003141095396131277, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 25.795900344848633, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.05698899179697037, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 18.22639274597168, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.03376070782542229, "pnorm/_forward_module.model.norm.weight": 22.894630432128906, "gnorm/_forward_module.model.norm.weight": 0.01964632235467434, "pnorm/_forward_module.lm_head.weight": 115.21121978759766, "gnorm/_forward_module.lm_head.weight": 0.31669971346855164} +{"step": 83886080, "pnorm/_forward_module.model.embeddings.weight": 106.83600616455078, "gnorm/_forward_module.model.embeddings.weight": 0.1316344439983368, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.775516510009766, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0033321096561849117, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 11.481205940246582, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.015032563358545303, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 11.5246000289917, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.010887769982218742, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.847792625427246, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.08262234181165695, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.841447830200195, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.08642872422933578, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.4939213991165161, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.011053667403757572, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0642610415816307, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0006266768323257565, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.760208129882812, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0024449783377349377, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.4133358001709, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.0635531097650528, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 18.60830307006836, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.05493597686290741, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.642797470092773, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0019923350773751736, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.704547882080078, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.014762197621166706, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 10.74113655090332, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.015568328090012074, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.639788627624512, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.03926309943199158, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.645692825317383, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.041322268545627594, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.5617039203643799, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.01914738118648529, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.013953110203146935, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0008981976425275207, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.65042495727539, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0012231635628268123, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 26.139623641967773, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.032895758748054504, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 18.50960350036621, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03192935511469841, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.60746192932129, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0009000562131404877, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 10.793282508850098, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.005929093342274427, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 10.75982666015625, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.005741813685745001, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.574904441833496, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.021997131407260895, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.562763214111328, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.022791419178247452, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 1.5088341236114502, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.005339370109140873, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.02315177395939827, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0003472678945399821, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.603839874267578, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.00073584308847785, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 26.047420501708984, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.020620718598365784, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 18.472381591796875, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.017804833129048347, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.586973190307617, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0006952830590307713, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 10.73270320892334, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.0026739079039543867, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 10.690000534057617, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.002474699169397354, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.533257484436035, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.019105004146695137, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 10.56136417388916, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.017869656905531883, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 1.4938703775405884, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.00265257665887475, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.02260523848235607, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.00013999533257447183, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.5675106048584, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0006456687697209418, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 25.9818115234375, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.0169414933770895, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 18.438438415527344, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.01506109070032835, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 22.576337814331055, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0006540013127960265, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 10.707620620727539, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.004992831498384476, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 10.663607597351074, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.004952191840857267, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 10.53066349029541, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.017709946259856224, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 10.574719429016113, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.016204170882701874, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 1.4692668914794922, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.00508389575406909, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.027870845049619675, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.00021639927581418306, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 22.532045364379883, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0008960103150457144, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 25.9154052734375, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.020438408479094505, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 18.358776092529297, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.01479102112352848, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.569896697998047, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0007873183349147439, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 10.709626197814941, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.002932377392426133, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 10.698485374450684, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.0031206784769892693, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 10.543062210083008, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.021326713263988495, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 10.58206558227539, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.017185091972351074, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 1.4422718286514282, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.003336995141580701, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.021667849272489548, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.00016211142065003514, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.508878707885742, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0012765902793034911, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 25.91353416442871, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.02539779432117939, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 18.286029815673828, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.013882339000701904, "pnorm/_forward_module.model.norm.weight": 22.96924591064453, "gnorm/_forward_module.model.norm.weight": 0.013659648597240448, "pnorm/_forward_module.lm_head.weight": 119.22555541992188, "gnorm/_forward_module.lm_head.weight": 0.2103087306022644} +{"step": 104857600, "pnorm/_forward_module.model.embeddings.weight": 108.76527404785156, "gnorm/_forward_module.model.embeddings.weight": 0.0988059863448143, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.824087142944336, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.003368658944964409, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 11.794903755187988, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.016469884663820267, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 11.851043701171875, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.014318790286779404, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.964425086975098, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.07618066668510437, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.950562477111816, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.08261243999004364, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.5321335792541504, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.01662900671362877, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.07160966098308563, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0009615309536457062, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.778173446655273, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0035780102480202913, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.60634994506836, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.06280063837766647, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 18.74416160583496, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.06500120460987091, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.650901794433594, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.002324324334040284, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.808197021484375, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.013642110861837864, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 10.84225082397461, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.013165615499019623, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.686192512512207, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.0460541695356369, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.694801330566406, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.05384279415011406, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.6083064079284668, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.01458628848195076, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.018821043893694878, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0006952470866963267, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.633716583251953, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.001352748367935419, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 26.243896484375, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.03713667392730713, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 18.607742309570312, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.04014023393392563, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.60975456237793, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0012576788431033492, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 10.933894157409668, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.006281932350248098, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 10.892952919006348, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.006226254627108574, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.602370262145996, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.030910717323422432, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.598128318786621, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.028728479519486427, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 1.557499885559082, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.005386714823544025, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.031153308227658272, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.000558873696718365, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.582597732543945, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0007571959285996854, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 26.146347045898438, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.024403586983680725, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 18.564661026000977, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.022008227184414864, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.583702087402344, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0010362648172304034, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 10.8764009475708, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.0037515603471547365, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 10.826032638549805, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.004400154110044241, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.554410934448242, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.03166214004158974, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 10.589902877807617, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.024834230542182922, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 1.5280916690826416, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.006276309490203857, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.029609646648168564, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0007125699776224792, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.543535232543945, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.001113652135245502, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 26.077869415283203, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.02389681339263916, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 18.52174949645996, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.020319726318120956, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 22.572967529296875, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0013868431560695171, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 10.812032699584961, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.006953163538128138, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 10.773788452148438, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.007686286699026823, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 10.558189392089844, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.03382859751582146, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 10.60929012298584, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.025318192318081856, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 1.5158807039260864, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.006155950948596001, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.03211659938097, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0004619324463419616, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 22.50444793701172, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0016975083854049444, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 26.005090713500977, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.031058358028531075, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 18.419004440307617, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.019784197211265564, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.568954467773438, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0016793108079582453, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 10.851700782775879, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.008938158862292767, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 10.831888198852539, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.007943904027342796, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 10.578812599182129, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.0427875779569149, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 10.620943069458008, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.03135610371828079, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 1.481346607208252, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.009231198579072952, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.030236247926950455, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0006436349940486252, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.48604393005371, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.003069445490837097, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 26.00773811340332, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.05107402428984642, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 18.33025360107422, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.024478260427713394, "pnorm/_forward_module.model.norm.weight": 23.0690975189209, "gnorm/_forward_module.model.norm.weight": 0.011584791354835033, "pnorm/_forward_module.lm_head.weight": 122.4480972290039, "gnorm/_forward_module.lm_head.weight": 0.2163497358560562} +{"step": 125829120, "pnorm/_forward_module.model.embeddings.weight": 110.539794921875, "gnorm/_forward_module.model.embeddings.weight": 0.13281475007534027, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.868183135986328, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.004224075004458427, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 12.056949615478516, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.015236386097967625, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 12.124784469604492, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.016914954409003258, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.059662818908691, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.1036851555109024, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.037720680236816, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.11615312099456787, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.574399471282959, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.01207314245402813, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.07757940143346786, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.00048717574100010097, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.784244537353516, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.00437500374391675, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.73851203918457, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.09157276898622513, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 18.84303855895996, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.10378007590770721, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.65324592590332, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0030961965676397085, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.907236099243164, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.010707822628319263, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 10.932843208312988, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.013345225714147091, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.707816123962402, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.08005566895008087, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.72025203704834, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.09847377985715866, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.6529532670974731, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.008171234279870987, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.02568211406469345, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0006001560832373798, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.613595962524414, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.001832918031141162, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 26.313701629638672, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.060162253677845, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 18.677331924438477, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.06206365302205086, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.610483169555664, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0025177819188684225, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 11.062769889831543, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.012116669677197933, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.016406059265137, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.01594688557088375, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.61920166015625, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.058585118502378464, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.623127937316895, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.05276278406381607, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 1.593552827835083, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.01483570970594883, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.040804117918014526, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0012305979616940022, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.567291259765625, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0012905660551041365, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 26.232860565185547, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.04012874513864517, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 18.639114379882812, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.03616517782211304, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.585166931152344, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.001965865259990096, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 11.015167236328125, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.00862634927034378, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 10.9642915725708, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.011770686134696007, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.575623512268066, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.057836540043354034, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 10.618704795837402, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.043824005872011185, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 1.5611770153045654, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.008931995369493961, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.03629131615161896, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0007217960665002465, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.529340744018555, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0018260062206536531, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 26.176589965820312, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.03611484915018082, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 18.60235595703125, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.03170987591147423, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 22.574865341186523, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0022885026410222054, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 10.925511360168457, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.012125948444008827, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 10.89113998413086, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.014009195379912853, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 10.583314895629883, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.05077369883656502, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 10.64307975769043, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.04018859937787056, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 1.5575289726257324, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.010789363645017147, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.03770965337753296, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.000804465205874294, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 22.493160247802734, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0022306276950985193, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 26.110389709472656, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.04047847539186478, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 18.491744995117188, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.029377514496445656, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.578500747680664, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.003157768864184618, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 10.98249340057373, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.017340989783406258, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 10.953262329101562, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.01773030497133732, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 10.623862266540527, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.0705447718501091, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 10.671080589294434, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.04896058142185211, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 1.5343643426895142, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.026394348591566086, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.03853106498718262, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0023084457498043776, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.475759506225586, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.005168651230633259, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 26.098526000976562, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.07813906669616699, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 18.379335403442383, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.038071539252996445, "pnorm/_forward_module.model.norm.weight": 23.194360733032227, "gnorm/_forward_module.model.norm.weight": 0.011216702871024609, "pnorm/_forward_module.lm_head.weight": 125.59259033203125, "gnorm/_forward_module.lm_head.weight": 0.23988300561904907} +{"step": 146800640, "pnorm/_forward_module.model.embeddings.weight": 112.16051483154297, "gnorm/_forward_module.model.embeddings.weight": 0.17487072944641113, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.905677795410156, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0045369332656264305, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 12.260844230651855, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.04342787340283394, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 12.33402156829834, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.0445626825094223, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.13553237915039, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.10954815149307251, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.106432914733887, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.12415492534637451, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.620123028755188, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.04885854199528694, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.08336569368839264, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0032211076468229294, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.781539916992188, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.003704165341332555, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.826221466064453, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.0784304141998291, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 18.91067123413086, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.07833585143089294, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.65464973449707, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0033845240250229836, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 10.998577117919922, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.03461149334907532, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.013505935668945, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.027810871601104736, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.720010757446289, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.06799273192882538, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.736266136169434, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.07398910075426102, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.695178508758545, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.030899381265044212, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.03349019214510918, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0013029540423303843, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.594846725463867, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0016050589038059115, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 26.369813919067383, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.04369986429810524, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 18.725732803344727, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.04256639629602432, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.615812301635742, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0020030729938298464, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 11.166619300842285, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.011723566800355911, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.11417293548584, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.016622940078377724, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.642720222473145, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.046804070472717285, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.653688430786133, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.04402254521846771, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 1.6359471082687378, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.016323016956448555, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.050704002380371094, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0014328381512314081, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.553266525268555, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0009945271303877234, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 26.309879302978516, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.03273516520857811, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 18.696025848388672, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.026959910988807678, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.59424591064453, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0013484113151207566, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 11.132234573364258, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.00568043626844883, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 11.073589324951172, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.007774591911584139, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.60438346862793, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.041327282786369324, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 10.655449867248535, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.03833989053964615, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 1.6075273752212524, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.008202757686376572, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.043013233691453934, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0009167056996375322, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.521343231201172, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0009043567697517574, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 26.277976989746094, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.028673401102423668, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 18.67426109313965, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.025584401562809944, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 22.58023452758789, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0016388295916840434, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 11.02031421661377, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.009991725906729698, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 10.985873222351074, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.01203190442174673, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 10.611977577209473, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.03942820429801941, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 10.681294441223145, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.035549141466617584, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 1.604661226272583, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.01585080660879612, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.043270133435726166, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0016493391012772918, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 22.492408752441406, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0016559226205572486, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 26.232797622680664, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.036421943455934525, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 18.57244873046875, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.026001645252108574, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.581708908081055, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.00200664927251637, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 11.077229499816895, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.020689914003014565, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 11.040599822998047, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.019657179713249207, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 10.658724784851074, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.050644248723983765, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 10.714216232299805, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.03804061934351921, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 1.6003793478012085, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.011447093449532986, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.04623497277498245, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0009182156063616276, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.4737606048584, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0047119236551225185, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 26.20450782775879, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.0780734196305275, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 18.44142723083496, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.03543779253959656, "pnorm/_forward_module.model.norm.weight": 23.335927963256836, "gnorm/_forward_module.model.norm.weight": 0.01244575809687376, "pnorm/_forward_module.lm_head.weight": 128.84165954589844, "gnorm/_forward_module.lm_head.weight": 0.19579890370368958} +{"step": 167772160, "pnorm/_forward_module.model.embeddings.weight": 113.65425109863281, "gnorm/_forward_module.model.embeddings.weight": 0.3752332925796509, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.937597274780273, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.013263835571706295, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 12.430756568908691, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.0384797640144825, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 12.5009126663208, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.037632327526807785, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.196552276611328, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.314021497964859, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.160433769226074, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.36097291111946106, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.6697561740875244, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.043641384690999985, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.08951397985219955, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.003036167938262224, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.777305603027344, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.008808135986328125, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.896268844604492, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.26096218824386597, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 18.962692260742188, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.3214764893054962, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.65239906311035, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.009959651157259941, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.064876556396484, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.040866605937480927, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.075906753540039, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.05842449143528938, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.722832679748535, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.24192827939987183, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.743001937866211, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.2419004589319229, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.7475247383117676, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.03196559473872185, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.04115737974643707, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0023183522280305624, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.579782485961914, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.004759897943586111, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 26.422643661499023, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.1375977098941803, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 18.764278411865234, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.1492980420589447, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.616697311401367, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.006893648765981197, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 11.23178768157959, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.027214152738451958, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.176909446716309, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.04267067834734917, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.666264533996582, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.18305528163909912, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.683701515197754, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.13463473320007324, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 1.6933085918426514, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.022443555295467377, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.05853838473558426, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0020074823405593634, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.547780990600586, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0029985280707478523, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 26.39693260192871, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.09887542575597763, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 18.75298309326172, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.10017292201519012, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.599571228027344, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.007837506011128426, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 11.219182968139648, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.03124256059527397, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 11.157421112060547, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.0766039788722992, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.633135795593262, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.16620101034641266, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 10.690659523010254, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.11244045197963715, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 1.6677882671356201, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.030966544523835182, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.04971807450056076, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0020819769706577063, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.527803421020508, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0023732015397399664, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 26.403688430786133, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.08238833397626877, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 18.754724502563477, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.08284381777048111, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 22.592710494995117, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.005242627114057541, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 11.11439037322998, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.035145070403814316, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 11.07318115234375, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.03883512318134308, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 10.648566246032715, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.1228802427649498, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 10.72793960571289, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.0995510146021843, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 1.6642427444458008, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.02441808395087719, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.05036652833223343, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.002336392644792795, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 22.501981735229492, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0025580525398254395, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 26.378047943115234, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.07401462644338608, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 18.664852142333984, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.07434221357107162, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.585594177246094, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0068656872026622295, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 11.165380477905273, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.06707088649272919, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 11.114307403564453, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.08433990180492401, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 10.684772491455078, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.14204658567905426, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 10.751830101013184, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.11026781797409058, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 1.6728826761245728, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.03933202847838402, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.054224275052547455, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0036527039483189583, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.475038528442383, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.00513248099014163, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 26.326316833496094, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.09200069308280945, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 18.51121711730957, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.06650051474571228, "pnorm/_forward_module.model.norm.weight": 23.490999221801758, "gnorm/_forward_module.model.norm.weight": 0.01681654341518879, "pnorm/_forward_module.lm_head.weight": 132.37454223632812, "gnorm/_forward_module.lm_head.weight": 0.23402036726474762} +{"step": 188743680, "pnorm/_forward_module.model.embeddings.weight": 115.03633117675781, "gnorm/_forward_module.model.embeddings.weight": 0.08376393467187881, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.970783233642578, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0032585039734840393, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 12.574190139770508, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.014919369481503963, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 12.638427734375, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.015795374289155006, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.263532638549805, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.08675462752580643, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.22181510925293, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.09267345815896988, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.7154415845870972, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.015888985246419907, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.09650439769029617, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0016836163122206926, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.77973747253418, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0029643329326063395, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.974180221557617, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.06862007826566696, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.011850357055664, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.07340862601995468, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.65519905090332, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0021133432164788246, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.122611999511719, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.01301821693778038, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.132399559020996, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.014455176889896393, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.739371299743652, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.0519380122423172, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.760787010192871, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.05444948747754097, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.798226237297058, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.008024908602237701, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.04852423444390297, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0009567710221745074, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.560087203979492, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0012348982272669673, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 26.464757919311523, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.03433221951127052, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 18.791027069091797, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03334961086511612, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.62276268005371, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0014553562505170703, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 11.288455963134766, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.007076281122863293, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.234171867370605, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.010397701524198055, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.698945045471191, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.036375608295202255, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.72293472290039, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.03278509899973869, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 1.7506951093673706, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.007318103685975075, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.06700449436903, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0006827554316259921, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.52984046936035, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0008589018252678216, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 26.463544845581055, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.026315895840525627, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 18.791440963745117, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.02174973674118519, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.601211547851562, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0010722638107836246, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 11.289315223693848, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.005969169083982706, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 11.22476863861084, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.008934075944125652, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.66433334350586, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.02896047569811344, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 10.7267427444458, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.025402246043086052, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 1.7188732624053955, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.006227957550436258, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.055233605206012726, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0005018361262045801, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.5206356048584, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.000763125135563314, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 26.506664276123047, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.023599296808242798, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 18.815479278564453, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.019996507093310356, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 22.601593017578125, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0009989457903429866, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 11.19720458984375, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.008404534310102463, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 11.150843620300293, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.01021958701312542, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 10.686054229736328, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.026951512321829796, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 10.7737455368042, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.02374197356402874, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 1.7205321788787842, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.009674391709268093, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.057904232293367386, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.001013408531434834, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 22.506567001342773, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.001000195275992155, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 26.515380859375, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.0251039769500494, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 18.744518280029297, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.021046068519353867, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.578516006469727, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0012637685285881162, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 11.242677688598633, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.012781188823282719, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 11.179377555847168, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.01338895969092846, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 10.701088905334473, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.029667265713214874, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 10.778005599975586, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.02660593017935753, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 1.7192858457565308, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.010645592585206032, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.05948524922132492, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0008564431918784976, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.475412368774414, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0028047822415828705, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 26.44715690612793, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.04446825385093689, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 18.57662582397461, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.023175247013568878, "pnorm/_forward_module.model.norm.weight": 23.63797378540039, "gnorm/_forward_module.model.norm.weight": 0.01328919269144535, "pnorm/_forward_module.lm_head.weight": 135.87295532226562, "gnorm/_forward_module.lm_head.weight": 0.15062718093395233} +{"step": 209715200, "pnorm/_forward_module.model.embeddings.weight": 116.34912109375, "gnorm/_forward_module.model.embeddings.weight": 0.13371901214122772, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.99783706665039, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.005790181457996368, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 12.712936401367188, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.01744506135582924, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 12.767948150634766, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.021696941927075386, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.312206268310547, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.13804848492145538, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.266064643859863, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.149854376912117, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.7596806287765503, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.018873820081353188, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.1041477620601654, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.002403165912255645, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.772520065307617, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.004605800844728947, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.026714324951172, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.1259504109621048, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.047922134399414, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.16037921607494354, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.65134048461914, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.005060211755335331, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.162675857543945, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.026778852567076683, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.173456192016602, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.044322166591882706, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.739363670349121, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.1287258267402649, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.763998031616211, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.11466874182224274, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.8608654737472534, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.00974093098193407, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0569148063659668, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0003519757592584938, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.543174743652344, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.001980964094400406, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 26.507627487182617, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.05981297791004181, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 18.81755256652832, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.06528261303901672, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.620853424072266, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0040210853330791, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 11.31894302368164, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.015362744219601154, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.26956844329834, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.023686038330197334, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.718331336975098, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.09621306508779526, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.749685287475586, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.06048718839883804, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 1.816667079925537, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.009321732446551323, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.07516387104988098, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0008463766425848007, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.52362632751465, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0015609717229381204, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 26.54401969909668, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.04419491067528725, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 18.837007522583008, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.04129757359623909, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.604591369628906, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.003032674780115485, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 11.348023414611816, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.014145085588097572, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 11.279391288757324, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.026296906173229218, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.693532943725586, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.08278848975896835, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 10.763762474060059, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.04668000340461731, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 1.7839628458023071, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.007528932765126228, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.06183319166302681, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0005908478633500636, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.530227661132812, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0016129405703395605, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 26.634843826293945, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.039430033415555954, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 18.891010284423828, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.035680368542671204, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 22.621212005615234, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0024913137312978506, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 11.283201217651367, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.012109853327274323, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 11.232443809509277, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.016965115442872047, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 10.731470108032227, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.06364411115646362, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 10.829790115356445, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.04144534096121788, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 1.7886402606964111, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.008455894887447357, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0660754069685936, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0006458215648308396, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 22.5244083404541, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0018280846998095512, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 26.682645797729492, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.03800693526864052, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 18.84081268310547, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.03393349424004555, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.584501266479492, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0032346206717193127, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 11.316814422607422, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.021123290061950684, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 11.244068145751953, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.03018214926123619, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 10.72836971282959, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.0732160359621048, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 10.817883491516113, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.042871225625276566, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 1.7883265018463135, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.014478277415037155, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.06651607900857925, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0014598111156374216, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.482778549194336, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.003975541330873966, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 26.593006134033203, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.059744756668806076, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 18.657867431640625, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.03441070020198822, "pnorm/_forward_module.model.norm.weight": 23.80154800415039, "gnorm/_forward_module.model.norm.weight": 0.0160821545869112, "pnorm/_forward_module.lm_head.weight": 139.5150909423828, "gnorm/_forward_module.lm_head.weight": 0.15685692429542542} +{"step": 230686720, "pnorm/_forward_module.model.embeddings.weight": 117.59530639648438, "gnorm/_forward_module.model.embeddings.weight": 0.19049403071403503, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 23.01949119567871, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.008033240213990211, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 12.831002235412598, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.017128897830843925, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 12.8766450881958, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.019032102078199387, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.348626136779785, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.20309196412563324, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.299198150634766, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.21811527013778687, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.7971235513687134, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.013582793064415455, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.11142747104167938, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0012898566201329231, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.762617111206055, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.005687080789357424, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.064123153686523, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.1755734533071518, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.074607849121094, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.23553292453289032, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.64662742614746, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.008592969737946987, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.206832885742188, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.03197294473648071, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.2162504196167, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.06188829988241196, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.734309196472168, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.1994755119085312, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.76162052154541, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.18193203210830688, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.913301706314087, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.01388774998486042, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0636541023850441, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0006036779377609491, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.534732818603516, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0025495989248156548, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 26.55596351623535, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.09598905593156815, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 18.845869064331055, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.10054946690797806, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.617097854614258, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.005318331066519022, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 11.345049858093262, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.023664621636271477, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.298416137695312, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.03715529292821884, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.732030868530273, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.15770231187343597, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.771069526672363, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.10204190760850906, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 1.881794810295105, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.013457235880196095, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.08174212276935577, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0008635143167339265, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.527463912963867, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.002073934068903327, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 26.63053321838379, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.0732022374868393, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 18.88658332824707, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.07394726574420929, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.61311912536621, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0049414727836847305, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 11.419389724731445, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.02139521948993206, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 11.339095115661621, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.03744104132056236, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.720309257507324, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.14235864579677582, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 10.799025535583496, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.07976144552230835, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 1.850890874862671, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.011397928930819035, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.06791403144598007, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0009546764777041972, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.546815872192383, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.002513659419491887, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 26.769325256347656, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.0722820833325386, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 18.969547271728516, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.06860664486885071, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 22.643346786499023, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.00604212936013937, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 11.371956825256348, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.026979347690939903, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 11.309248924255371, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.043459463864564896, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 10.773518562316895, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.14914603531360626, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 10.884276390075684, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.0711166113615036, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 1.8562583923339844, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.013241935521364212, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.07341331988573074, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0008141965372487903, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 22.547651290893555, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0031956692691892385, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 26.855274200439453, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.06870071589946747, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 18.939620971679688, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.062133338302373886, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.596630096435547, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0060209245420992374, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 11.39108943939209, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.0404476672410965, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 11.311890602111816, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.054257411509752274, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 10.751691818237305, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.12477506697177887, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 10.854666709899902, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.06664307415485382, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 1.8716579675674438, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.021433386951684952, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.07459148019552231, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0022607718128710985, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.49197769165039, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.004628044553101063, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 26.73834800720215, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.08054588735103607, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 18.737686157226562, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.05005943775177002, "pnorm/_forward_module.model.norm.weight": 23.97566032409668, "gnorm/_forward_module.model.norm.weight": 0.012181926518678665, "pnorm/_forward_module.lm_head.weight": 143.2727813720703, "gnorm/_forward_module.lm_head.weight": 0.24428319931030273} +{"step": 251658240, "pnorm/_forward_module.model.embeddings.weight": 118.77630615234375, "gnorm/_forward_module.model.embeddings.weight": 0.09104131907224655, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 23.043256759643555, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.003967117518186569, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 12.93606185913086, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.016746236011385918, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 12.97180461883545, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.017688849940896034, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.389973640441895, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.10109597444534302, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.337420463562012, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.10743393748998642, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.8331917524337769, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.018047207966446877, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.1185770109295845, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0009192582801915705, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.757070541381836, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0023324466310441494, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.108543395996094, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.06591136753559113, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.101581573486328, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.07353788614273071, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.648448944091797, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0027899721171706915, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.257966041564941, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.012535429559648037, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.26070785522461, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.017693717032670975, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.737060546875, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.06864340603351593, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.765826225280762, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.0697237104177475, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 1.9694334268569946, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.008365937508642673, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.07080504298210144, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0006330774631351233, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.524751663208008, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0016419473104178905, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 26.600032806396484, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.04205929487943649, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 18.870288848876953, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.035878922790288925, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.621288299560547, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0017433192115277052, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 11.387290000915527, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.009558811783790588, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.3395414352417, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.013158700428903103, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.749876022338867, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.04824104160070419, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.794900894165039, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.043127186596393585, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 1.9576321840286255, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.00603956775739789, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.08955115079879761, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0005996979307383299, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.52739906311035, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0011436814675107598, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 26.706390380859375, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.03283266723155975, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 18.929319381713867, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.025952156633138657, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.62242317199707, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0015175408916547894, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 11.487507820129395, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.008609105832874775, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 11.397679328918457, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.011436790227890015, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.744427680969238, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.041046444326639175, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 10.830910682678223, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.03392907977104187, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 1.9233272075653076, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.006664702668786049, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0749426931142807, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0005981608992442489, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.56074333190918, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0010038301115855575, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 26.892406463623047, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.03043271228671074, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.039464950561523, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.024798719212412834, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 22.662601470947266, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0013744536554440856, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 11.459198951721191, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.011094560846686363, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 11.382780075073242, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.013025806285440922, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 10.809196472167969, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.0368102490901947, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 10.930768966674805, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.03257548809051514, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 1.919845461845398, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.007494404446333647, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0807492583990097, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.000537809741217643, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 22.567773818969727, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.001170991687104106, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 27.014596939086914, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.031339697539806366, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 19.030467987060547, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.027022453024983406, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.597213745117188, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.001864320831373334, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 11.46182918548584, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.013967448845505714, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 11.377432823181152, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.016739584505558014, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 10.763339042663574, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.042556922882795334, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 10.87402629852295, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.029189875349402428, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 1.947001338005066, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.005487373564392328, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0821760818362236, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.000577339029405266, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.502880096435547, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0028949470724910498, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 26.88933563232422, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.048810314387083054, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 18.822097778320312, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.02678196132183075, "pnorm/_forward_module.model.norm.weight": 24.14000129699707, "gnorm/_forward_module.model.norm.weight": 0.014620570465922356, "pnorm/_forward_module.lm_head.weight": 146.97328186035156, "gnorm/_forward_module.lm_head.weight": 0.11145640164613724} +{"step": 272629760, "pnorm/_forward_module.model.embeddings.weight": 119.91284942626953, "gnorm/_forward_module.model.embeddings.weight": 0.10889001935720444, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 23.057653427124023, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.004217384848743677, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 13.033028602600098, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.013483759947121143, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 13.059002876281738, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.017534516751766205, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.412915229797363, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.13076737523078918, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.358333587646484, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.13625681400299072, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.8598153591156006, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.013878464698791504, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.1263749897480011, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0015820229891687632, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.743017196655273, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.004496718756854534, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.132381439208984, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.09118454903364182, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.119535446166992, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.11226189881563187, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.647310256958008, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.004121502861380577, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.306846618652344, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.01542238611727953, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.302882194519043, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.02447574958205223, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.731199264526367, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.10719011723995209, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.762475967407227, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.09258536994457245, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.022031545639038, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.010991967283189297, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.07788518816232681, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0004613384953700006, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.514833450317383, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0018790309550240636, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 26.643281936645508, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.050080325454473495, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 18.893898010253906, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.044430848211050034, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.62422752380371, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0022747351322323084, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 11.427118301391602, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.010895579122006893, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.376411437988281, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.01489811297506094, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.763297080993652, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.0740971788764, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.816730499267578, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.04862150549888611, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.030853509902954, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.006349043920636177, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.09594131261110306, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0006057805730961263, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.53068733215332, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.001366862328723073, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 26.78592872619629, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.04131748899817467, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 18.973661422729492, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.033244188874959946, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.636764526367188, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0019422370241954923, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 11.560235023498535, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.010788674466311932, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 11.458044052124023, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.014126082882285118, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.770689010620117, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.06462884694337845, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 10.868244171142578, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.0433826819062233, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 1.9914355278015137, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.007485238369554281, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.08127256482839584, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0006745359860360622, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.577972412109375, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0015585002256557345, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.02077865600586, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.041612815111875534, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.112918853759766, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.032181914895772934, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 22.69171905517578, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0024419575929641724, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 11.555557250976562, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.02050498127937317, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 11.464851379394531, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.02218904159963131, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 10.852001190185547, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.06302734464406967, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 10.989665031433105, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.039792250841856, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 1.973231554031372, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.01414131373167038, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.08629895746707916, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0014770098496228456, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 22.59722328186035, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0017882657703012228, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 27.189908981323242, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.04029306769371033, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 19.131345748901367, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.0316234789788723, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.60917854309082, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.002278014784678817, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 11.54100513458252, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.021365994587540627, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 11.449332237243652, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.028771869838237762, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 10.786969184875488, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.056546103209257126, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 10.91482162475586, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.03543837368488312, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.005755662918091, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.010855098254978657, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.08846894651651382, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.001047951402142644, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.513490676879883, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.002303200075402856, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 27.039731979370117, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.043763983994722366, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 18.904651641845703, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.027595791965723038, "pnorm/_forward_module.model.norm.weight": 24.317291259765625, "gnorm/_forward_module.model.norm.weight": 0.01278015412390232, "pnorm/_forward_module.lm_head.weight": 150.7801513671875, "gnorm/_forward_module.lm_head.weight": 0.10523701459169388} +{"step": 293601280, "pnorm/_forward_module.model.embeddings.weight": 120.99600982666016, "gnorm/_forward_module.model.embeddings.weight": 0.11147966235876083, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 23.070520401000977, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.005462083965539932, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 13.113507270812988, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.013726666569709778, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 13.130859375, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.017138920724391937, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.43447494506836, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.13642817735671997, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.37843132019043, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.14352861046791077, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.8813782930374146, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.01240807119756937, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.13357405364513397, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.002736865309998393, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.732301712036133, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0028684858698397875, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.15867805480957, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.09181730449199677, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.137418746948242, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.11017195135354996, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.647695541381836, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.004199353978037834, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.361574172973633, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.018798930570483208, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.348026275634766, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.030113019049167633, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.726417541503906, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.10414307564496994, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.759902954101562, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.1031198501586914, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.057053804397583, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.009412898682057858, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.08254634588956833, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0008818696951493621, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.507240295410156, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.001957631204277277, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 26.685997009277344, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.06048803776502609, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 18.91669273376465, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.05287410318851471, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.63071060180664, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.002713034860789776, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 11.480287551879883, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.013688579201698303, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.4203519821167, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.024272989481687546, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.774970054626465, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.0804024413228035, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.835330963134766, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.06201629713177681, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.095649242401123, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.006920207291841507, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.10218387842178345, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0005676428554579616, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.537717819213867, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0013855933211743832, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 26.866369247436523, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.046325843781232834, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.019298553466797, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.03845065459609032, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.6495361328125, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.002471981104463339, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 11.630498886108398, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.013287698850035667, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 11.515692710876465, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.02102171629667282, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.791457176208496, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.0728471428155899, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 10.89852237701416, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.0533377043902874, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.052717924118042, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.006540372502058744, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.08681830018758774, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0005456437938846648, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.596288681030273, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0013716727262362838, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.14311981201172, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.047228723764419556, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.18341827392578, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.03883901238441467, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 22.719341278076172, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0030395379289984703, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 11.653237342834473, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.02125188335776329, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 11.547159194946289, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.032916925847530365, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 10.885393142700195, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.07673543691635132, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 11.037700653076172, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.05071266368031502, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.0186901092529297, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.014704009518027306, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0911145731806755, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0016136798076331615, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 22.623960494995117, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0017263386398553848, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 27.35148048400879, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.05246324837207794, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 19.22401237487793, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.04170777648687363, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.620813369750977, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.002819473622366786, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 11.621431350708008, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.025993449613451958, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 11.522273063659668, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.036378324031829834, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 10.805458068847656, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.06723666936159134, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 10.94849681854248, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.04792013764381409, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.0576281547546387, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.015223890542984009, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.094670869410038, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0016411906108260155, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.52789306640625, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.004479128867387772, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 27.193988800048828, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.07742790877819061, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 18.993030548095703, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.04462972283363342, "pnorm/_forward_module.model.norm.weight": 24.4943790435791, "gnorm/_forward_module.model.norm.weight": 0.015598480589687824, "pnorm/_forward_module.lm_head.weight": 154.57345581054688, "gnorm/_forward_module.lm_head.weight": 0.1563960164785385} +{"step": 314572800, "pnorm/_forward_module.model.embeddings.weight": 122.03596496582031, "gnorm/_forward_module.model.embeddings.weight": 0.09771708399057388, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 23.07990837097168, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.004299760330468416, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 13.18920612335205, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.012488343752920628, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 13.19742488861084, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.015127616003155708, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.449588775634766, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.10629958659410477, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.392823219299316, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.11635883897542953, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.9008351564407349, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.010763601399958134, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.1404450237751007, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0007686673779971898, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.720945358276367, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0023362282663583755, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.183303833007812, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.07568563520908356, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.154064178466797, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.0858607292175293, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.647563934326172, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0033518148120492697, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.407238960266113, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.013268765062093735, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.385930061340332, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.021018048748373985, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.720319747924805, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.08972633630037308, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.756085395812988, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.07212679088115692, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.0994181632995605, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.007395288906991482, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.08930277824401855, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0005197248538024724, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.501869201660156, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.001387468772009015, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 26.732139587402344, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.04439787566661835, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 18.94086456298828, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03961736336350441, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.63694953918457, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0021608914248645306, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 11.536081314086914, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.010279212146997452, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.46448802947998, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.015968598425388336, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.783308029174805, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.060639046132564545, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.850088119506836, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.043926484882831573, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.1546823978424072, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.007969115860760212, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.10765751451253891, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0005742908688262105, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.543148040771484, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.001024339348077774, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 26.942567825317383, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.036548733711242676, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.063627243041992, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.02992447279393673, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.66213607788086, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0017592781223356724, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 11.69373893737793, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.009843999519944191, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 11.566996574401855, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.013545127585530281, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.809877395629883, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.05416324734687805, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 10.925602912902832, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.036741457879543304, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.1151516437530518, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.009200768545269966, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.09237707406282425, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0009994391584768891, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.616409301757812, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0009982774499803782, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.26201629638672, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.03482495993375778, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.252370834350586, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.028577987104654312, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 22.758689880371094, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0020569968037307262, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 11.756866455078125, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.01428201049566269, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 11.628239631652832, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.026935212314128876, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 10.923151016235352, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.04641883820295334, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 11.093290328979492, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.03354502469301224, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.07488751411438, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.010041143745183945, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.09710695594549179, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0012082296889275312, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 22.656204223632812, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0010768487118184566, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 27.51453971862793, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.032654475420713425, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 19.318321228027344, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.02869519218802452, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.636314392089844, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0020829124841839075, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 11.702534675598145, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.01811295561492443, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 11.593524932861328, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.025772061198949814, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 10.822979927062988, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.04568447917699814, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 10.98041820526123, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.029198022559285164, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.1227545738220215, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.00972615834325552, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.10187438875436783, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0010930602438747883, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.540971755981445, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0022629289887845516, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 27.341197967529297, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.04361443594098091, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 19.073518753051758, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.027927054092288017, "pnorm/_forward_module.model.norm.weight": 24.668575286865234, "gnorm/_forward_module.model.norm.weight": 0.011973658576607704, "pnorm/_forward_module.lm_head.weight": 158.26121520996094, "gnorm/_forward_module.lm_head.weight": 0.11205610632896423} +{"step": 335544320, "pnorm/_forward_module.model.embeddings.weight": 123.0317153930664, "gnorm/_forward_module.model.embeddings.weight": 0.11661148816347122, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 23.08358383178711, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.004108330700546503, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 13.253385543823242, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.014832854270935059, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 13.25259017944336, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.01756509765982628, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.45580005645752, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.11946862190961838, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.398685455322266, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.13505417108535767, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.9144059419631958, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.018491661176085472, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.14765243232250214, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0011536992387846112, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.706457138061523, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0024992553517222404, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.198484420776367, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.08869421482086182, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.166118621826172, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.10052657127380371, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.647356033325195, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0034114900045096874, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.45528793334961, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.015373595058918, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.424327850341797, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.021816883236169815, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.713380813598633, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.1040230467915535, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.752172470092773, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.09149830043315887, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.1300063133239746, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.012305195443332195, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.09466448426246643, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0008743847720324993, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.499984741210938, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0013437627349048853, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 26.78034210205078, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.053050097078084946, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 18.968019485473633, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.04848535358905792, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.643774032592773, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0030816011130809784, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 11.590971946716309, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.013264995068311691, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.507065773010254, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.02112061157822609, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.791428565979004, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.08815094828605652, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.86434555053711, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.057391565293073654, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.204148054122925, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.009049582295119762, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.11255798488855362, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.00043346991878934205, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.551950454711914, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0014432207681238651, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 27.02067756652832, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.0458691343665123, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.109678268432617, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.03791375458240509, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.673358917236328, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0025880825705826283, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 11.752862930297852, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.012410185299813747, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 11.61631965637207, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.019679062068462372, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.826204299926758, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.0764010101556778, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 10.950239181518555, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.04693560674786568, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.1684327125549316, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.010842060670256615, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.09722232818603516, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0008347769035026431, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.63558578491211, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0014482917031273246, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.37534523010254, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.046487435698509216, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.31822395324707, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.03788452968001366, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 22.804636001586914, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0029504697304219007, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 11.864716529846191, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.020435545593500137, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 11.709628105163574, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.03519668057560921, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 10.960811614990234, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.07568573206663132, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 11.14995288848877, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.04125481843948364, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.124318838119507, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.012867894023656845, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.10214037448167801, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0015190762933343649, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 22.690589904785156, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0018807696178555489, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 27.672582626342773, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.04699908569455147, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 19.410154342651367, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.03901050612330437, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.652408599853516, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0029035035986453295, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 11.785618782043457, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.020871706306934357, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 11.664782524108887, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.025180019438266754, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 10.839564323425293, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.06871379911899567, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 11.010252952575684, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.03877045586705208, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.177987575531006, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.013871962204575539, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.10800457000732422, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0015760917449370027, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.55708885192871, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0027401261031627655, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 27.49110984802246, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.05128858610987663, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 19.157978057861328, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.03378387168049812, "pnorm/_forward_module.model.norm.weight": 24.843355178833008, "gnorm/_forward_module.model.norm.weight": 0.012321566231548786, "pnorm/_forward_module.lm_head.weight": 161.8473358154297, "gnorm/_forward_module.lm_head.weight": 0.12996439635753632} +{"step": 356515840, "pnorm/_forward_module.model.embeddings.weight": 123.98330688476562, "gnorm/_forward_module.model.embeddings.weight": 0.10730889439582825, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 23.08510398864746, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.004508263431489468, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 13.31334400177002, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.014395227655768394, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 13.304265022277832, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.017157213762402534, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.458649635314941, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.12883594632148743, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.401488304138184, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.14043456315994263, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.9236663579940796, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.011593151837587357, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.15469171106815338, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0007897707400843501, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.691715240478516, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0031417664140462875, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.211524963378906, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.08846230804920197, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.176698684692383, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.09974571317434311, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.645889282226562, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.003687808057293296, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.498858451843262, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.012657041661441326, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.458312034606934, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.0191914364695549, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.705687522888184, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.09943155944347382, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.746960639953613, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.08730655163526535, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.158503770828247, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.00887899100780487, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.09998590499162674, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0004716853436548263, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.49564552307129, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0015040352009236813, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 26.823041915893555, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.05234513804316521, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 18.991851806640625, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.04561625048518181, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.650527954101562, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.00248261378146708, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 11.644381523132324, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.012047269381582737, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.547119140625, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.018545400351285934, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.79857063293457, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.07257091253995895, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.876603126525879, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.05391809716820717, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.251948118209839, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.008329235017299652, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.11731315404176712, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0005663410993292928, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.560457229614258, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.001202534418553114, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 27.09560203552246, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.04238276183605194, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.154769897460938, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.035078421235084534, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.685503005981445, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.00201427168212831, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 11.812843322753906, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.011553647927939892, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 11.665576934814453, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.01501484215259552, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.84318733215332, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.06301015615463257, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 10.975397109985352, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.045525092631578445, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.2159578800201416, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.008298151195049286, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.10203133523464203, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0006281682872213423, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.654605865478516, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.001439082552678883, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.48304557800293, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.043323762714862823, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.3809757232666, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.035409312695264816, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 22.856101989746094, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.003030424239113927, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 11.975990295410156, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.022105321288108826, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 11.79249095916748, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.03977242112159729, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 10.998800277709961, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.06701275706291199, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 11.2079496383667, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.04137362912297249, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.1760923862457275, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.015276911668479443, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.10715842992067337, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.001990947872400284, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 22.727018356323242, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0015610696282237768, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 27.827381134033203, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.04062538221478462, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 19.499956130981445, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.036933645606040955, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.671510696411133, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.002845051698386669, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 11.875776290893555, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.029735036194324493, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 11.742036819458008, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.035470038652420044, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 10.857763290405273, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.06154875084757805, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 11.04102611541748, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.03733653575181961, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.233957529067993, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.015533343888819218, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.1143498420715332, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0013977715279906988, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.574983596801758, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0022534248419106007, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 27.639223098754883, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.04701949656009674, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 19.2421875, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.032660916447639465, "pnorm/_forward_module.model.norm.weight": 25.01641082763672, "gnorm/_forward_module.model.norm.weight": 0.010202317498624325, "pnorm/_forward_module.lm_head.weight": 165.26121520996094, "gnorm/_forward_module.lm_head.weight": 0.12439104169607162} +{"step": 377487360, "pnorm/_forward_module.model.embeddings.weight": 124.89445495605469, "gnorm/_forward_module.model.embeddings.weight": 0.0783911645412445, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 23.081172943115234, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.003394216997548938, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 13.362104415893555, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.009574750438332558, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 13.345293045043945, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.010895627550780773, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.454590797424316, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.10197697579860687, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.397655487060547, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.10649345815181732, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.9312808513641357, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.006689104717224836, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.1623755544424057, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0007538440986536443, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.674407958984375, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0020088849123567343, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.21968650817871, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.06694821268320084, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.185239791870117, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.0760410875082016, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.643272399902344, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.002992226742208004, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.536368370056152, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.01218761783093214, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.486722946166992, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.020178502425551414, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.69869613647461, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.08252700418233871, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.742256164550781, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.07019130140542984, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.1754813194274902, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.008499414660036564, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1046927273273468, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0009231276926584542, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.49241065979004, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0013545402325689793, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 26.86638832092285, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.040555838495492935, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.017414093017578, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.034779105335474014, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.655832290649414, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.00183225201908499, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 11.697392463684082, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.008640754967927933, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.58662223815918, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.014242188073694706, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.803635597229004, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.058095432817935944, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.88759708404541, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.04346470162272453, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.297619104385376, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.005888940766453743, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.12264548987150192, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.00048669864190742373, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.569225311279297, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0010000746697187424, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 27.17021942138672, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.03464248403906822, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.200864791870117, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.027864953503012657, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.698333740234375, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.001609499566257, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 11.872076034545898, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.009497537277638912, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 11.713635444641113, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.01233475748449564, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.861428260803223, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.05158692225813866, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.003145217895508, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.03735635429620743, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.26071834564209, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.0071451980620622635, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.10706187784671783, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0008418260258622468, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.671621322631836, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0009782410925254226, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.585403442382812, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.034832727164030075, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.44188117980957, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.028255803510546684, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 22.918428421020508, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0023360333871096373, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 12.095951080322266, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.017280058935284615, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 11.878045082092285, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.03010326251387596, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 11.040064811706543, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.05807463452219963, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 11.272201538085938, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.034780412912368774, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.231570243835449, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.00816722959280014, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.1126532331109047, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0007694617379456758, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 22.76466178894043, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0012303570983931422, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 27.978649139404297, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.035187214612960815, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 19.588115692138672, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.028927108272910118, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.693622589111328, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.002000664360821247, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 11.964360237121582, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.014818428084254265, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 11.815727233886719, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.0197223499417305, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 10.879727363586426, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.05328282341361046, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 11.079030990600586, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.03123931586742401, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.2867488861083984, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.007295742630958557, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.12066281586885452, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0007664074073545635, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.59544563293457, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0029928458388894796, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 27.785655975341797, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.055556103587150574, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 19.329317092895508, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.03669915348291397, "pnorm/_forward_module.model.norm.weight": 25.191997528076172, "gnorm/_forward_module.model.norm.weight": 0.013388078659772873, "pnorm/_forward_module.lm_head.weight": 168.49778747558594, "gnorm/_forward_module.lm_head.weight": 0.10367895662784576} +{"step": 398458880, "pnorm/_forward_module.model.embeddings.weight": 125.76701354980469, "gnorm/_forward_module.model.embeddings.weight": 0.10229793190956116, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 23.077865600585938, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.004486253950744867, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 13.412444114685059, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.011220389977097511, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 13.387136459350586, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.012700512073934078, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.451393127441406, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.13266228139400482, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.394583702087402, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.13834714889526367, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.9375767707824707, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.009291688911616802, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.1703713834285736, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0005065248114988208, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.65518569946289, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.002534394385293126, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.225217819213867, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.08674588799476624, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.192516326904297, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.09443842619657516, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.638912200927734, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0034320892300456762, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.569879531860352, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.014047432690858841, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.513006210327148, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.02414454147219658, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.69328784942627, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.10381689667701721, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.739217758178711, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.08664577454328537, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.1869494915008545, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.013242070563137531, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.10920026898384094, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0014650889206677675, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.488483428955078, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0013829271774739027, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 26.90978240966797, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.0496591217815876, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.04371452331543, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.04333323612809181, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.66161346435547, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0023446278646588326, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 11.755073547363281, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.012665360234677792, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.628942489624023, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.018958792090415955, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.808263778686523, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.07735806703567505, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.89801025390625, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.05314937233924866, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.3372395038604736, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.00977968517690897, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.1274646669626236, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0007879264885559678, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.577993392944336, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0012200692435726523, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 27.244123458862305, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.042551565915346146, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.247478485107422, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.034735891968011856, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.710710525512695, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.002170894993469119, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 11.93533706665039, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.012660070322453976, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 11.764045715332031, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.017846597358584404, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.88010311126709, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.06823495775461197, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.031176567077637, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.044544193893671036, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.296987533569336, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.010384132154285908, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.11125534772872925, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0012625920353457332, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.685447692871094, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0012402565917000175, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.6811580657959, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.04138964042067528, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.500568389892578, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.03405993431806564, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 22.985971450805664, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0030886614695191383, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 12.221569061279297, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.018554937094449997, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 11.967947959899902, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.035285770893096924, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 11.081694602966309, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.06854552030563354, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 11.33785629272461, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.04105832427740097, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.281315326690674, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.009273367933928967, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.11736711114645004, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0009778881212696433, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 22.803176879882812, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.001476565725170076, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 28.127840042114258, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.03970320522785187, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 19.674652099609375, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.034778691828250885, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.712421417236328, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0022951026912778616, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 12.048412322998047, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.014200737699866295, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 11.88521957397461, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.024841193109750748, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 10.899712562561035, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.05760574713349342, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 11.113861083984375, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.03478769212961197, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.327662706375122, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.008615827187895775, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.12528900802135468, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.001023236894980073, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.614728927612305, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.001988058676943183, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 27.932491302490234, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.0440484881401062, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 19.411945343017578, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.034004904329776764, "pnorm/_forward_module.model.norm.weight": 25.362735748291016, "gnorm/_forward_module.model.norm.weight": 0.009785422123968601, "pnorm/_forward_module.lm_head.weight": 171.56622314453125, "gnorm/_forward_module.lm_head.weight": 0.11838195472955704} +{"step": 419430400, "pnorm/_forward_module.model.embeddings.weight": 126.6026382446289, "gnorm/_forward_module.model.embeddings.weight": 0.13300304114818573, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 23.070653915405273, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.006131074391305447, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 13.460570335388184, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.014509015716612339, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 13.427083969116211, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.017392786219716072, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.442947387695312, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.17182698845863342, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.386990547180176, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.17770014703273773, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.9418224096298218, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.014241264201700687, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.17787866294384003, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0015122892800718546, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.631864547729492, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.004065715242177248, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.22486686706543, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.11835425347089767, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.197128295898438, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.1331779658794403, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.631771087646484, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.005674952641129494, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.599457740783691, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.018233221024274826, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.536073684692383, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.030067410320043564, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.687495231628418, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.15131744742393494, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.736363410949707, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.11323127895593643, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.192054510116577, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.015085827559232712, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.11287225782871246, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0007716157706454396, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.48261260986328, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0017733098939061165, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 26.951976776123047, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.06397969275712967, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.070005416870117, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.057329267263412476, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.667369842529297, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.003432836849242449, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 11.816295623779297, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.014523073099553585, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.673852920532227, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.020957399159669876, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.812700271606445, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.11097199469804764, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.90843391418457, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.06701502948999405, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.3743228912353516, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.010087914764881134, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.13210712373256683, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0008170761866495013, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.583576202392578, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0017507713055238128, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 27.313764572143555, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.05828706920146942, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.292850494384766, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.04833988845348358, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.72382354736328, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0032901419326663017, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 12.003243446350098, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.015179264359176159, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 11.817774772644043, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.022486506029963493, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.89942741394043, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.10027962177991867, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.060054779052734, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.05650703236460686, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.3308398723602295, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.013345101848244667, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.1152004599571228, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0017104634316638112, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.695497512817383, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0015815087826922536, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.77082633972168, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.056871239095926285, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.5561466217041, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.04732450842857361, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.061893463134766, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.004773326218128204, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 12.351630210876465, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.02523997239768505, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 12.058046340942383, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.04637295752763748, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 11.127816200256348, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.11313251405954361, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 11.412273406982422, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.04920203983783722, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.3231849670410156, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.009759616106748581, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.12134481966495514, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0009079553419724107, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 22.843002319335938, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.001707739313133061, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 28.27666664123535, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.04993486404418945, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 19.761449813842773, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.044634658843278885, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.73823356628418, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.003006188664585352, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 12.140711784362793, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.017246492207050323, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 11.96092700958252, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.024104928597807884, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 10.924047470092773, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.08378183841705322, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 11.156548500061035, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.039526697248220444, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.377810001373291, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.00859067589044571, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.1305346041917801, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0009147594100795686, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.63437843322754, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.00240719155408442, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 28.077661514282227, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.04892037808895111, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 19.494508743286133, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.04002435877919197, "pnorm/_forward_module.model.norm.weight": 25.534820556640625, "gnorm/_forward_module.model.norm.weight": 0.012046243995428085, "pnorm/_forward_module.lm_head.weight": 174.47003173828125, "gnorm/_forward_module.lm_head.weight": 0.10241372138261795} +{"step": 440401920, "pnorm/_forward_module.model.embeddings.weight": 127.4016342163086, "gnorm/_forward_module.model.embeddings.weight": 0.11076513677835464, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 23.06170654296875, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.004029294941574335, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 13.513799667358398, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.01381214614957571, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 13.47204875946045, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.020139018073678017, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.430787086486816, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.12355135381221771, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.376423835754395, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.12641681730747223, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.944541096687317, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.013009830377995968, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.18556486070156097, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0010258110705763102, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.605783462524414, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0022881680633872747, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.22022247314453, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.07767193764448166, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.19944953918457, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.07856135070323944, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.621179580688477, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0032384961377829313, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.618587493896484, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.013946810737252235, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.550588607788086, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.02028597705066204, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.681197166442871, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.08873898535966873, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.732852935791016, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.07521510124206543, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.193178653717041, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.013630231842398643, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.11632797867059708, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0014285554643720388, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.475997924804688, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0012849320191890001, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 26.992389678955078, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.04595048353075981, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.09649085998535, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03734510391950607, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.672325134277344, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0019351942464709282, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 11.873587608337402, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.008784042671322823, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.715360641479492, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.01311560533940792, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.817234992980957, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.06578682363033295, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.91893482208252, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.04760182276368141, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.417750120162964, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.007681317627429962, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.13722524046897888, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.00038758653681725264, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.58790397644043, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0012469823705032468, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 27.381587982177734, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.04162292182445526, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.337039947509766, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.03244561329483986, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.737201690673828, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.00197144434787333, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 12.072874069213867, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.010059165768325329, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 11.871269226074219, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.013795804232358932, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.92042064666748, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.062404632568359375, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.091043472290039, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.04027608036994934, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.367361545562744, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.01099981926381588, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.11999338865280151, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0011717057786881924, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.699188232421875, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0012494259281083941, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.850879669189453, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.04131341725587845, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.605276107788086, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.0316472090780735, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.14288330078125, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0034407766070216894, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 12.490116119384766, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.01755926012992859, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 12.150766372680664, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.03629198670387268, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 11.178740501403809, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.06111367791891098, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 11.495649337768555, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.036341749131679535, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.366691827774048, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.010901357978582382, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.12553489208221436, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0014835481997579336, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 22.879703521728516, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.001212718547321856, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 28.418354034423828, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.03613821789622307, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 19.844619750976562, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.030756203457713127, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.763370513916016, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0018267427803948522, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 12.233549118041992, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.013278229162096977, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 12.036519050598145, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.018160704523324966, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 10.94697093963623, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.05144893378019333, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 11.197552680969238, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.028207333758473396, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.4238955974578857, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.012628191150724888, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.13559430837631226, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0014414290199056268, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.65908432006836, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0024690988939255476, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 28.22845458984375, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.04656601324677467, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 19.58501434326172, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.0319368951022625, "pnorm/_forward_module.model.norm.weight": 25.70808219909668, "gnorm/_forward_module.model.norm.weight": 0.008805469609797001, "pnorm/_forward_module.lm_head.weight": 177.2296905517578, "gnorm/_forward_module.lm_head.weight": 0.07623133063316345} +{"step": 461373440, "pnorm/_forward_module.model.embeddings.weight": 128.1724090576172, "gnorm/_forward_module.model.embeddings.weight": 0.12523862719535828, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 23.048799514770508, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.005390833131968975, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 13.566010475158691, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.013237104751169682, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 13.516889572143555, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.017227182164788246, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.412449836730957, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.1670149713754654, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.360329627990723, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.17639660835266113, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.9455666542053223, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.012661349959671497, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.19395820796489716, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0010538218775764108, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.576231002807617, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.003232979215681553, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.208864212036133, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.10796771198511124, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.198638916015625, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.10780072212219238, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.606977462768555, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.003987675998359919, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.636107444763184, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.01595599576830864, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.565070152282715, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.02553144469857216, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.67153263092041, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.1292364001274109, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.726133346557617, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.10312586277723312, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.1872410774230957, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.015089266933500767, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.11862709373235703, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.000948338070884347, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.46748924255371, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0019612854812294245, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.030229568481445, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.06542326509952545, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.122385025024414, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.0532471165060997, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.67634391784668, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0027894650120288134, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 11.931078910827637, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.013544655404984951, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.756717681884766, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.024341100826859474, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.820140838623047, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.09871938079595566, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.927481651306152, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.06956463307142258, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.456946611404419, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.010382549837231636, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.14165529608726501, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0006981990300118923, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.586442947387695, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0016269857296720147, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 27.440488815307617, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.0586865171790123, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.376224517822266, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.0471416637301445, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.752363204956055, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.002664086641743779, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 12.146353721618652, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.014614103361964226, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 11.928153991699219, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.018540602177381516, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.942986488342285, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.08965379744768143, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.123725891113281, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.06263314932584763, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.399639129638672, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.011962451972067356, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.12401176989078522, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0011897742515429854, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.69557762145996, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.002032896736636758, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.91726303100586, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.06059258058667183, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.64629554748535, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.048397861421108246, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.23037338256836, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.005058838985860348, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 12.62539291381836, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.024377651512622833, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 12.237906455993652, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.05449729785323143, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 11.237589836120605, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.11371665447950363, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 11.592327117919922, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.05512974411249161, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.407928228378296, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.013631529174745083, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.12940861284732819, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0014630239456892014, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 22.917041778564453, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0019719020929187536, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 28.559476852416992, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.056435856968164444, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 19.92876434326172, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.04769906401634216, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.788619995117188, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.003709380514919758, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 12.321540832519531, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.022254955023527145, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 12.107276916503906, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.03400900959968567, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 10.973063468933105, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.10076599568128586, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 11.243049621582031, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.04722192510962486, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.4675943851470947, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.016595885157585144, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.13980677723884583, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.002076784148812294, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.68401336669922, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.004418401978909969, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 28.377302169799805, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.08198182284832001, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 19.675207138061523, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.05650278553366661, "pnorm/_forward_module.model.norm.weight": 25.880910873413086, "gnorm/_forward_module.model.norm.weight": 0.010104636661708355, "pnorm/_forward_module.lm_head.weight": 179.88380432128906, "gnorm/_forward_module.lm_head.weight": 0.1825292706489563} +{"step": 482344960, "pnorm/_forward_module.model.embeddings.weight": 128.91761779785156, "gnorm/_forward_module.model.embeddings.weight": 0.07941078394651413, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 23.038623809814453, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.003282698802649975, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 13.628076553344727, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.009418168105185032, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 13.569841384887695, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.011454613879323006, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.396394729614258, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.10145469009876251, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.34578800201416, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.10773970931768417, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.9501638412475586, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.00759013881906867, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.20295970141887665, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.00042762921657413244, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.5490779876709, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0020800193306058645, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.202030181884766, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.06346683204174042, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.199390411376953, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.06650038063526154, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.58868980407715, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0025066910311579704, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.647156715393066, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.010268224403262138, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.574130058288574, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.0156781654804945, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.659266471862793, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.07625259459018707, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.716650009155273, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.07127705216407776, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.178622245788574, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.012812024913728237, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.11984903365373611, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0013509339187294245, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.45794105529785, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0010739907156676054, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.065784454345703, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.041742466390132904, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.146100997924805, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03322777897119522, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.678531646728516, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0017709573730826378, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 11.986621856689453, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.008668308146297932, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.795585632324219, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.012895965948700905, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.822261810302734, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.060405004769563675, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.935050964355469, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.0466947928071022, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.490936517715454, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.008430423215031624, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.14567789435386658, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0006683855899609625, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.58112907409668, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.001057197107002139, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 27.492250442504883, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.03901446610689163, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.410844802856445, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.0297229066491127, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.76519012451172, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.00165904953610152, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 12.221142768859863, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.008766558952629566, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 11.986505508422852, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.011165237985551357, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.961343765258789, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.051882535219192505, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.150932312011719, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.04091638699173927, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.424013137817383, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.009269880130887032, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.12714214622974396, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0008291368139907718, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.686769485473633, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0012920401059091091, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.973365783691406, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.04078851267695427, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.682048797607422, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.029162226244807243, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.315710067749023, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.003245763713493943, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 12.761555671691895, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.01830100454390049, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 12.32479476928711, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.043573927134275436, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 11.29765510559082, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.06341782957315445, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 11.695272445678711, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.03413818031549454, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.4398341178894043, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.0077997418120503426, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.13206131756305695, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0009214167948812246, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 22.952688217163086, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.001108091906644404, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 28.69431495666504, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.03169718012213707, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.00983238220215, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.02624671533703804, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.81229591369629, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0012663041707128286, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 12.409306526184082, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.008709406480193138, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 12.176384925842285, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.012837238609790802, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 10.99650764465332, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.040160976350307465, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 11.284608840942383, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.024372175335884094, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.5174713134765625, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.005101846065372229, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.14461356401443481, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0005494621582329273, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.70825958251953, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0021813595667481422, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 28.52202606201172, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.03639062121510506, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 19.76184844970703, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.028391750529408455, "pnorm/_forward_module.model.norm.weight": 26.050922393798828, "gnorm/_forward_module.model.norm.weight": 0.010517852380871773, "pnorm/_forward_module.lm_head.weight": 182.4493865966797, "gnorm/_forward_module.lm_head.weight": 0.07045239210128784} +{"step": 503316480, "pnorm/_forward_module.model.embeddings.weight": 129.635009765625, "gnorm/_forward_module.model.embeddings.weight": 0.09357499331235886, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 23.028308868408203, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.00429779477417469, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 13.695470809936523, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.010200158692896366, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 13.62750244140625, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.011931635439395905, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.378880500793457, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.13020466268062592, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.329827308654785, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.13408531248569489, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.9567428827285767, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.008078246377408504, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.21186669170856476, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0008454202325083315, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.522031784057617, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.002170364372432232, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.194091796875, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.0777055025100708, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.20043182373047, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.07885469496250153, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.572345733642578, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.003222769359126687, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.658615112304688, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.014787564054131508, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.582852363586426, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.023053305223584175, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.64881706237793, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.09353551268577576, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.708791732788086, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.08385801315307617, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.1759722232818604, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.022075429558753967, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.12166144698858261, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0023700303863734007, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.447824478149414, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0013429097598418593, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.09895896911621, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.05071127042174339, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.169015884399414, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.04037129506468773, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.679086685180664, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.002046923851594329, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.036238670349121, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.011288803070783615, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.830131530761719, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.017390495166182518, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.823448181152344, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.07190453261137009, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.940119743347168, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.0589553564786911, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.5252881050109863, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.009941854514181614, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.14974819123744965, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0008659526356495917, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.574708938598633, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0011679284507408738, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 27.542089462280273, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.04564827308058739, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.4443302154541, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.03652728348970413, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.77825355529785, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.002037184080109, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 12.298134803771973, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.009877245873212814, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 12.046442985534668, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.012644934467971325, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.978778839111328, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.06604481488466263, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.17583179473877, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.05102993920445442, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.4474117755889893, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.00910518690943718, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.13018135726451874, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0008306254749186337, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.672197341918945, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0014678023289889097, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 28.019630432128906, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.04786617308855057, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.711528778076172, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.035746607929468155, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.394445419311523, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.004217156674712896, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 12.88501262664795, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.019829733297228813, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 12.401474952697754, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.04734686762094498, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 11.357048034667969, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.07696544378995895, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 11.800097465515137, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.04197488725185394, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.4650626182556152, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.010916869156062603, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.13449563086032867, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.001422666129656136, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 22.98513412475586, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0013103536330163479, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 28.821144104003906, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.04095424711704254, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.0872745513916, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.034986190497875214, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.836925506591797, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.002148519968613982, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 12.496907234191895, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.012808754108846188, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 12.245428085327148, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.019492298364639282, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.020607948303223, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.050648823380470276, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 11.327685356140137, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.031762003898620605, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.5613644123077393, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.010421128012239933, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.1488582193851471, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.001345705590210855, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.73277473449707, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0019287464674562216, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 28.663793563842773, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.04469606652855873, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 19.84614372253418, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.03542183339595795, "pnorm/_forward_module.model.norm.weight": 26.222204208374023, "gnorm/_forward_module.model.norm.weight": 0.008269052021205425, "pnorm/_forward_module.lm_head.weight": 184.9420166015625, "gnorm/_forward_module.lm_head.weight": 0.09206656366586685} +{"step": 524288000, "pnorm/_forward_module.model.embeddings.weight": 130.3246612548828, "gnorm/_forward_module.model.embeddings.weight": 0.08718787878751755, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 23.019155502319336, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0035892766900360584, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 13.765745162963867, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.01043260470032692, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 13.687149047851562, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.012564009055495262, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.361692428588867, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.11395692825317383, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.314373970031738, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.12380198389291763, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.9658564329147339, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.009793245233595371, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.22093811631202698, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0010595459025353193, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.49486541748047, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.001968561904504895, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.185304641723633, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.06936425715684891, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.200292587280273, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.06982214003801346, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.554048538208008, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0022324086166918278, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.663812637329102, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.010417691431939602, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.586557388305664, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.013971212320029736, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.636076927185059, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.0808873102068901, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.698802947998047, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.07886005192995071, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.176032543182373, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.010697307996451855, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1237124502658844, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0006265264819376171, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.4364070892334, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0014104443835094571, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.12786865234375, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.04911782965064049, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.189735412597656, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.039300717413425446, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.67761993408203, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0021521225571632385, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.079867362976074, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.01214568316936493, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.859943389892578, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.018303576856851578, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.822196960449219, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.07044097036123276, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.94301700592041, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.05430533364415169, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.567878007888794, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.00873540248721838, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.15417517721652985, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0006440966972149909, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.56618881225586, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.001291750930249691, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 27.586475372314453, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.04710237681865692, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.47489356994629, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.03540424257516861, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.789806365966797, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0019171671010553837, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 12.373250007629395, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.009871567599475384, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 12.104911804199219, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.013470706529915333, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 10.993290901184082, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.06493211537599564, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.196977615356445, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.048449914902448654, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.4741241931915283, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.009654193185269833, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.13339169323444366, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0005450038006529212, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.652278900146484, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0025097932666540146, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 28.05372428894043, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.057725075632333755, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.732797622680664, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.03605244681239128, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.468564987182617, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.005106343887746334, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 12.99743366241455, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.022382739931344986, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 12.472896575927734, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.061500176787376404, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 11.418662071228027, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.07594190537929535, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 11.910921096801758, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.03892766684293747, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.486382246017456, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.014046944677829742, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.13636769354343414, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.002507086144760251, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.01252555847168, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0013176919892430305, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 28.937028884887695, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.03875409811735153, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.15878677368164, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.03288609907031059, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.860864639282227, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.002323599997907877, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 12.581269264221191, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.01610160805284977, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 12.31103801727295, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.025142844766378403, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.043536186218262, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.06123049929738045, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 11.369857788085938, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.03061932511627674, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.604661703109741, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.011684687808156013, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.1532377004623413, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0015633400762453675, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.757295608520508, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0035232477821409702, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 28.801347732543945, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.06292885541915894, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 19.928030014038086, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.04750709608197212, "pnorm/_forward_module.model.norm.weight": 26.392784118652344, "gnorm/_forward_module.model.norm.weight": 0.006388008128851652, "pnorm/_forward_module.lm_head.weight": 187.33482360839844, "gnorm/_forward_module.lm_head.weight": 0.0961197018623352} +{"step": 545259520, "pnorm/_forward_module.model.embeddings.weight": 130.9851531982422, "gnorm/_forward_module.model.embeddings.weight": 0.09866008162498474, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 23.010822296142578, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.004089560825377703, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 13.840600967407227, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.010384276509284973, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 13.751606941223145, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.012585465796291828, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.344374656677246, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.13228513300418854, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.298503875732422, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.1396215409040451, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.9735642671585083, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.010566944256424904, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.22870320081710815, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0005894952337257564, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.469507217407227, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0023533094208687544, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.17711067199707, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.08006951212882996, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.2001953125, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.07893776148557663, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.537981033325195, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0027331802994012833, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.671140670776367, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.011806734837591648, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.591340065002441, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.016273802146315575, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.624966621398926, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.09463205933570862, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.689739227294922, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.09234125167131424, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.177952289581299, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.017008071765303612, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.12554606795310974, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0018205269007012248, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.424983978271484, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0016119088977575302, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.153779983520508, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.05514190346002579, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.208681106567383, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.04312380030751228, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.677536010742188, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.002266249153763056, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.125182151794434, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.011184202507138252, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.889050483703613, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.015512706711888313, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.821208000183105, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.08082005381584167, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.945377349853516, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.06610430777072906, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.603516101837158, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.011960332281887531, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.15766242146492004, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0011429593432694674, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.559635162353516, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0016767495544627309, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 27.629194259643555, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.05404861271381378, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.503725051879883, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.0419466570019722, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.801664352416992, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0025711010675877333, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 12.451581954956055, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.013163045980036259, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 12.16599178314209, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.017794452607631683, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.005953788757324, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.08220379799604416, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.214759826660156, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.06008578836917877, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.49410080909729, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.014011350460350513, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.13564267754554749, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0017591594951227307, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.629804611206055, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0020516999065876007, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 28.078433990478516, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.05817018076777458, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.746984481811523, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.04044386371970177, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.52790641784668, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.004822211340069771, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 13.090774536132812, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.021762659773230553, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 12.534430503845215, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.05578288808465004, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 11.473750114440918, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.09728507697582245, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 12.0155668258667, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.04584130272269249, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.502387285232544, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.011761712841689587, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.13796770572662354, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.001554311253130436, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.039270401000977, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0014152801595628262, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 29.048904418945312, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.044542036950588226, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.22747802734375, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.038972947746515274, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.88224220275879, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0022819829173386097, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 12.665375709533691, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.0186097901314497, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 12.377079010009766, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.025023356080055237, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.06595516204834, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.05892543867230415, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 11.410065650939941, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.03527301549911499, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.639575719833374, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.022267838940024376, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.1564987748861313, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0025904770009219646, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.78061294555664, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.002335346769541502, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 28.9324951171875, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.049437109380960464, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.006345748901367, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.039113517850637436, "pnorm/_forward_module.model.norm.weight": 26.560176849365234, "gnorm/_forward_module.model.norm.weight": 0.009667621925473213, "pnorm/_forward_module.lm_head.weight": 189.63722229003906, "gnorm/_forward_module.lm_head.weight": 0.09062054008245468} +{"step": 566231040, "pnorm/_forward_module.model.embeddings.weight": 131.616943359375, "gnorm/_forward_module.model.embeddings.weight": 0.09257137775421143, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 23.002197265625, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.004241005051881075, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 13.91598129272461, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.010182862170040607, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 13.815650939941406, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.011858198791742325, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.325624465942383, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.1274988204240799, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.281806945800781, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.13117951154708862, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.9838422536849976, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.008924507535994053, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.23655036091804504, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0010059983469545841, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.44268035888672, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0023919311352074146, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.16490936279297, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.07394254207611084, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.197500228881836, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.07484568655490875, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.5207576751709, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0026270339731127024, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.681082725524902, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.011670769192278385, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.599470138549805, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.01925392635166645, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.610475540161133, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.08974747359752655, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.67731761932373, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.08121136575937271, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.1834654808044434, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.013904587365686893, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.12758590281009674, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0007926201215013862, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.415035247802734, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.00142424157820642, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.17845344543457, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.05246545001864433, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.22659683227539, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.040093131363391876, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.67397689819336, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.002328513190150261, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.164811134338379, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.012087262235581875, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.915053367614746, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.018740398809313774, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.8154296875, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.07583335787057877, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.94276237487793, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.05822914093732834, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.638157367706299, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.013864818960428238, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.16086547076702118, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0007925962563604116, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.55381202697754, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0013794010737910867, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 27.66927146911621, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.04926542565226555, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.53152084350586, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.03743276745080948, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.812376022338867, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0019770576618611813, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 12.52827262878418, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.00947288516908884, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 12.226068496704102, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.013163702562451363, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.014494895935059, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.06827860325574875, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.228662490844727, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.052021004259586334, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.5103113651275635, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.010600379668176174, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.13697491586208344, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0012660971842706203, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.607025146484375, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.001842746278271079, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 28.096996307373047, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.05720341578125954, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.75773811340332, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.03783155605196953, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.58294105529785, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.00474676163867116, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 13.173293113708496, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.021747080609202385, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 12.590808868408203, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.06024261936545372, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 11.529168128967285, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.09579355269670486, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 12.124587059020996, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.04195033386349678, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.516636371612549, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.012874141335487366, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.1391449123620987, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0017544724978506565, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.064897537231445, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.001527615706436336, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 29.154083251953125, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.04257133975625038, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.291881561279297, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.03556307032704353, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.90379524230957, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.00222900346852839, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 12.739968299865723, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.014222950674593449, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 12.436185836791992, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.0247640460729599, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.08928108215332, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.06532065570354462, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 11.451973915100098, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.03269795700907707, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.6775357723236084, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.01236772257834673, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.15983805060386658, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0015891651855781674, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.803518295288086, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0023891828022897243, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 29.057613372802734, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.050376880913972855, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.079425811767578, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.036837853491306305, "pnorm/_forward_module.model.norm.weight": 26.72469139099121, "gnorm/_forward_module.model.norm.weight": 0.010827641934156418, "pnorm/_forward_module.lm_head.weight": 191.869384765625, "gnorm/_forward_module.lm_head.weight": 0.11172198504209518} +{"step": 587202560, "pnorm/_forward_module.model.embeddings.weight": 132.2198486328125, "gnorm/_forward_module.model.embeddings.weight": 0.08466605842113495, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.993871688842773, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0034304216969758272, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 13.985758781433105, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.010022037662565708, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 13.875059127807617, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.01246686838567257, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.307923316955566, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.11080293357372284, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.265799522399902, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.1179494708776474, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 1.9922033548355103, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.008832824416458607, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.24362407624721527, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0007083591190166771, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.42000961303711, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0019828847143799067, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.15715980529785, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.06831581145524979, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.196374893188477, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.06779512017965317, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.504674911499023, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.002034249948337674, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.691499710083008, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.010650614276528358, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.608168601989746, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.015366698615252972, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.596370697021484, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.07792558521032333, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.665386199951172, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.07603589445352554, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.1879355907440186, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.013391397893428802, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.12948215007781982, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0008732040878385305, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.40667152404785, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.001271469402126968, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.202617645263672, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.04833031818270683, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.243663787841797, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03774266690015793, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.670780181884766, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0017527805175632238, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.200374603271484, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.00908010546118021, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.939257621765137, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.012638548389077187, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.811546325683594, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.06713636219501495, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.941020965576172, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.05391576141119003, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.6614010334014893, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.008641388267278671, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.16312715411186218, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0007683138246648014, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.548702239990234, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.001451854594051838, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 27.70584487915039, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.04754660651087761, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.557207107543945, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.03594350442290306, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.825590133666992, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.001962720649316907, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 12.608642578125, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.008342716842889786, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 12.290182113647461, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.012195616029202938, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.02466106414795, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.06873508542776108, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.24301528930664, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.04984309896826744, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.5239193439483643, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.008120325393974781, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.1382778286933899, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0005642768810503185, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.5872802734375, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.003617769805714488, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 28.1104679107666, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.06855738908052444, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.76482582092285, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.04044617712497711, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.621042251586914, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.006745758466422558, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 13.237469673156738, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.03647832199931145, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 12.639154434204102, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.0770118311047554, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 11.578289985656738, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.08561540395021439, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 12.224433898925781, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.037459395825862885, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.5290982723236084, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.011814338155090809, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.14033697545528412, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0019129421561956406, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.086326599121094, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0013586321147158742, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 29.24787712097168, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.040182214230298996, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.348831176757812, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.03227870166301727, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.925457000732422, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.001972934463992715, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 12.814481735229492, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.012482201680541039, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 12.493943214416504, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.018223850056529045, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.112107276916504, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.06287754327058792, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 11.493610382080078, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.029877040535211563, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.7098093032836914, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.008406882174313068, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.16264715790748596, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0010658090468496084, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.826942443847656, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0030368538573384285, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 29.175853729248047, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.054855670779943466, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.149642944335938, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.04392606392502785, "pnorm/_forward_module.model.norm.weight": 26.88644790649414, "gnorm/_forward_module.model.norm.weight": 0.006414105650037527, "pnorm/_forward_module.lm_head.weight": 194.02978515625, "gnorm/_forward_module.lm_head.weight": 0.07362798601388931} +{"step": 608174080, "pnorm/_forward_module.model.embeddings.weight": 132.7937774658203, "gnorm/_forward_module.model.embeddings.weight": 0.09324617683887482, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.982986450195312, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.003494745586067438, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 14.049301147460938, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.007852527312934399, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 13.929021835327148, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.009071563370525837, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.287379264831543, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.11840179562568665, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.24687385559082, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.1226731613278389, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0000369548797607, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.006339225452393293, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.25010600686073303, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.00037607696140185, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.39397430419922, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0019934631418436766, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.14256477355957, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.07063397765159607, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.19086456298828, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.08344034850597382, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.49014663696289, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0026774427387863398, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.703042030334473, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.010726544074714184, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.617230415344238, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.01696922816336155, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.58362865447998, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.08981208503246307, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.654370307922363, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.08598429709672928, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.18705415725708, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.01251278817653656, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.13053485751152039, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0011396242771297693, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.395992279052734, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.001521183643490076, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.220935821533203, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.05373520031571388, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.25668716430664, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.04495440050959587, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.667098999023438, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0020174921955913305, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.231640815734863, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.009911121800541878, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.960105895996094, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.014930813573300838, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.807196617126465, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.07760138809680939, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.938215255737305, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.06159059330821037, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.6775355339050293, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.011447370983660221, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.16438478231430054, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0009011897491291165, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.5418643951416, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.001857844996266067, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 27.73666763305664, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.05617256462574005, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.580018997192383, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.04491807147860527, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.83747100830078, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.002078188117593527, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 12.682576179504395, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.01001430582255125, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 12.348503112792969, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.01324539352208376, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.033214569091797, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.0779595673084259, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.255447387695312, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.058040834963321686, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.5319933891296387, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.01112950686365366, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.1387225240468979, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0008082672138698399, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.565059661865234, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.005388081539422274, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 28.11310577392578, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.09838428348302841, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.764129638671875, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.058750614523887634, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.663280487060547, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.010053387843072414, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 13.305691719055176, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.08825492113828659, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 12.690781593322754, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.13109861314296722, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 11.628827095031738, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.11418887227773666, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 12.331921577453613, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.035602446645498276, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.5377237796783447, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.018618421629071236, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.14095674455165863, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0034696385264396667, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.105987548828125, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0012278016656637192, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 29.334823608398438, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.036237411201000214, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.401865005493164, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.03090221807360649, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.94660186767578, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0018456674879416823, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 12.884806632995605, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.013428877107799053, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 12.547882080078125, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.018325606361031532, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.13548755645752, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.051216933876276016, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 11.53606128692627, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.026097623631358147, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.7310571670532227, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.015668107196688652, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.16479219496250153, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0019545224495232105, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.850759506225586, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0020487052388489246, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 29.290739059448242, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.03926685079932213, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.217151641845703, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.031516872346401215, "pnorm/_forward_module.model.norm.weight": 27.044815063476562, "gnorm/_forward_module.model.norm.weight": 0.008031395263969898, "pnorm/_forward_module.lm_head.weight": 196.09226989746094, "gnorm/_forward_module.lm_head.weight": 0.08150552213191986} +{"step": 629145600, "pnorm/_forward_module.model.embeddings.weight": 133.3385772705078, "gnorm/_forward_module.model.embeddings.weight": 0.10836733877658844, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.974563598632812, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.004690782632678747, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 14.113333702087402, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.012650684453547001, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 13.983172416687012, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.015555073507130146, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.269970893859863, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.1544216424226761, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.231014251708984, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.1499316543340683, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0074679851531982, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.01250538881868124, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.25581094622612, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0009166421950794756, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.37277603149414, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.002318240934982896, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.13494110107422, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.08529126644134521, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.189340591430664, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.09264523535966873, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.47748565673828, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0032654614187777042, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.719788551330566, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.012197526171803474, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.631291389465332, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.022149186581373215, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.571518898010254, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.11056976765394211, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.643708229064941, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.09578881412744522, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.193901777267456, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.015614592470228672, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.13215608894824982, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0009852549992501736, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.38869857788086, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0016956684412434697, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.24299430847168, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.06243203580379486, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.272274017333984, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.0499282144010067, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.66256332397461, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.00281900935806334, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.258501052856445, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.012032398022711277, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.978117942810059, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.02391112595796585, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.802173614501953, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.09776932001113892, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.934769630432129, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.06709928810596466, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.6955175399780273, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.010648575611412525, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.16610410809516907, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0009204015950672328, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.537900924682617, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0019029694376513362, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 27.769189834594727, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.05620720237493515, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.60368537902832, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.04538873955607414, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.84868621826172, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0024154481943696737, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 12.756836891174316, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.009764833375811577, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 12.407959938049316, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.0147289102897048, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.039427757263184, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.08897218853235245, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.265048027038574, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.0627334713935852, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.543116807937622, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.011759890243411064, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.13941286504268646, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0012215664610266685, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.54686737060547, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.004831778351217508, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 28.115848541259766, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.08649338036775589, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.763137817382812, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.05610034614801407, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.695987701416016, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.009589050896465778, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 13.366514205932617, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.07373480498790741, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 12.738666534423828, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.1178293228149414, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 11.673970222473145, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.1159856840968132, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 12.429946899414062, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.04170767590403557, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.546757698059082, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.02106037549674511, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.14174695312976837, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.003995003644376993, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.124046325683594, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.001438274746760726, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 29.416336059570312, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.04187346249818802, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.452037811279297, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.03597191348671913, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.966150283813477, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.002060454338788986, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 12.95345401763916, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.011093342676758766, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 12.601015090942383, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.01517839077860117, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.157509803771973, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.06098154932260513, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 11.576085090637207, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.03079625405371189, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.7534050941467285, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.013347704894840717, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.16677144169807434, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0017338368343189359, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.873069763183594, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0016621540999040008, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 29.397188186645508, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.03955509141087532, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.280237197875977, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.032456040382385254, "pnorm/_forward_module.model.norm.weight": 27.198617935180664, "gnorm/_forward_module.model.norm.weight": 0.00675298972055316, "pnorm/_forward_module.lm_head.weight": 198.05392456054688, "gnorm/_forward_module.lm_head.weight": 0.07027759402990341} +{"step": 650117120, "pnorm/_forward_module.model.embeddings.weight": 133.85455322265625, "gnorm/_forward_module.model.embeddings.weight": 0.09970211982727051, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.96574592590332, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.004195492714643478, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 14.170971870422363, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.011438763700425625, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.031952857971191, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.015019737184047699, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.252745628356934, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.1420404613018036, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.215036392211914, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.14201407134532928, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.013953924179077, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.011194159276783466, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.2606754004955292, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.00040203757816925645, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.352523803710938, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.002484887605533004, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.127490997314453, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.0817767009139061, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.187217712402344, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.08855811506509781, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.46613883972168, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.002913471544161439, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.737299919128418, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.011170684359967709, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.644852638244629, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.01703702285885811, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.560013771057129, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.09987596422433853, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.63351821899414, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.08720856159925461, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.2025234699249268, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.012439129874110222, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.13377317786216736, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0010997412027791142, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.38090705871582, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0018190500559285283, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.261865615844727, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.058157749474048615, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.28566551208496, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.046226900070905685, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.660293579101562, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0022633341141045094, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.287147521972656, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.011948765255510807, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 11.997123718261719, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.01982947252690792, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.798735618591309, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.08062399178743362, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.933127403259277, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.06034021079540253, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.715135097503662, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.010950464755296707, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.16797882318496704, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0013053604634478688, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.53336524963379, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0018183885840699077, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 27.79769515991211, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.05581940710544586, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.624948501586914, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.04442049190402031, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.860965728759766, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.002422739053145051, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 12.827947616577148, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.010530954226851463, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 12.464631080627441, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.015794388949871063, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.047651290893555, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.08449961990118027, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.276342391967773, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.058088939636945724, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.554626226425171, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.01034450065344572, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.14027006924152374, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0005988876800984144, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.53005599975586, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.004483302589505911, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 28.115646362304688, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.08249295502901077, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.761119842529297, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.0570177398622036, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.72606658935547, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.009209664538502693, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 13.422709465026855, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.06558062881231308, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 12.783963203430176, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.10313582420349121, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 11.713570594787598, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.13215680420398712, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 12.521140098571777, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.044025763869285583, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.5541129112243652, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.020528966560959816, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.14235559105873108, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.004015712533146143, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.139238357543945, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0014953905483707786, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 29.489532470703125, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.04719872772693634, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.497446060180664, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.038936685770750046, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 22.986154556274414, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.002269535791128874, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.016969680786133, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.01553372759371996, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 12.649957656860352, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.02458859421312809, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.180953025817871, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.06933137029409409, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 11.61815357208252, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.03522561863064766, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.7769992351531982, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.01943039707839489, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.1689138412475586, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0027445531450212, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.89436912536621, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0025103711523115635, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 29.496397018432617, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.05317879468202591, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.33913230895996, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.04049231484532356, "pnorm/_forward_module.model.norm.weight": 27.34844398498535, "gnorm/_forward_module.model.norm.weight": 0.006223059259355068, "pnorm/_forward_module.lm_head.weight": 199.92138671875, "gnorm/_forward_module.lm_head.weight": 0.10315292328596115} +{"step": 671088640, "pnorm/_forward_module.model.embeddings.weight": 134.34368896484375, "gnorm/_forward_module.model.embeddings.weight": 0.0846937745809555, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.960023880004883, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.003437833394855261, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 14.230409622192383, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.009906584396958351, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.082666397094727, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.012052349746227264, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.23956298828125, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.12025292217731476, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.203167915344238, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.12798042595386505, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0208969116210938, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.010388758964836597, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.26542940735816956, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0019085647072643042, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.33293914794922, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0020618666894733906, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.120115280151367, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.07124169915914536, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.184185028076172, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.08709387481212616, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.455699920654297, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.002756789093837142, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.753728866577148, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.010076207108795643, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.658032417297363, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.01671478897333145, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.54946231842041, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.09131456911563873, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.623910903930664, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.08907715976238251, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.2140066623687744, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.012817910872399807, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.13589230179786682, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0013894832227379084, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.3750057220459, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0016371897654607892, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.281246185302734, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.053360357880592346, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.298925399780273, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.048127397894859314, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.65839958190918, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0021985783241689205, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.317134857177734, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.010844511911273003, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.017585754394531, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.01712421327829361, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.79453182220459, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.07893633842468262, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.929996490478516, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.06411401927471161, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.737887382507324, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.010321340523660183, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.17011654376983643, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0008286783122457564, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.53103256225586, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0021373501513153315, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 27.826196670532227, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.05719630792737007, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.645950317382812, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.0493239089846611, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.872238159179688, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0022966659162193537, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 12.897600173950195, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.009323777630925179, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 12.520064353942871, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.01317282672971487, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.053683280944824, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.07927346974611282, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.285006523132324, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.06342759728431702, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.5672054290771484, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.009879220277071, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.14156347513198853, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.001128380885347724, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.51361846923828, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.010579629801213741, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 28.111576080322266, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.1473333090543747, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.756555557250977, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.08773857355117798, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.754615783691406, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.01797524280846119, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 13.480687141418457, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.15974865853786469, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 12.830183982849121, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.23904019594192505, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 11.749584197998047, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.10495985299348831, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 12.607535362243652, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.03531548008322716, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.559389591217041, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.029821598902344704, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.1428786963224411, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0058329044841229916, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.151151657104492, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.001184387831017375, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 29.55353355407715, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.03511161729693413, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.53696632385254, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.029869263991713524, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.005184173583984, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.001977429259568453, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.077054977416992, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.013847687281668186, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 12.695968627929688, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.02086167223751545, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.203461647033691, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.04887531325221062, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 11.658379554748535, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.025423688814044, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.797100305557251, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.01776924915611744, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.17091162502765656, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0019246295560151339, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.910486221313477, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.00213628844358027, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 29.579984664916992, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.04139848053455353, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.388519287109375, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.03337901458144188, "pnorm/_forward_module.model.norm.weight": 27.492111206054688, "gnorm/_forward_module.model.norm.weight": 0.007569338660687208, "pnorm/_forward_module.lm_head.weight": 201.6817626953125, "gnorm/_forward_module.lm_head.weight": 0.0697295293211937} +{"step": 692060160, "pnorm/_forward_module.model.embeddings.weight": 134.8092803955078, "gnorm/_forward_module.model.embeddings.weight": 0.08929193764925003, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.95271873474121, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.003702018177136779, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 14.287496566772461, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.009377451613545418, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.130566596984863, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.01139868050813675, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.22375202178955, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.12306412309408188, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.188481330871582, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.12143574655056, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0259532928466797, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.008768623694777489, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.268900066614151, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0006963612977415323, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.314979553222656, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.001863899640738964, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.113679885864258, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.06723637878894806, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.1821346282959, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.07045397907495499, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.444143295288086, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0023465261328965425, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.767558097839355, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.009487834759056568, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.6689453125, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.013917691074311733, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.537044525146484, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.08402647823095322, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.61259651184082, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.0735391154885292, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.2248544692993164, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.011156493797898293, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1377658247947693, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00043780551641248167, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.370161056518555, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0013803554465994239, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.30032730102539, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.049490608274936676, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.31199836730957, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03818056359887123, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.656328201293945, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0020427382551133633, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.342205047607422, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.010822472162544727, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.033893585205078, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.016468271613121033, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.790641784667969, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.06933659315109253, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.927129745483398, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.05107486620545387, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.759277582168579, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.011231140233576298, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.17191559076309204, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0009110230021178722, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.530750274658203, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0013648406602442265, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 27.855342864990234, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.04589239880442619, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.667016983032227, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.03618251904845238, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.883272171020508, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.001859284471720457, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 12.962814331054688, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.008915445767343044, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 12.572606086730957, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.01283302716910839, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.058589935302734, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.06489621102809906, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.293240547180176, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.04664922133088112, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.580382823944092, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.009206647984683514, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.14251630008220673, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0010339347645640373, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.502357482910156, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.002086567459627986, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 28.111757278442383, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.05230613052845001, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.755197525024414, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.042504873126745224, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.773799896240234, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.005116415675729513, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 13.52672004699707, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.03278525546193123, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 12.868165969848633, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.05273598060011864, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 11.782397270202637, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.10807187110185623, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 12.684576034545898, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.03691733628511429, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.5662596225738525, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.016490206122398376, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.1434997171163559, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.002536082174628973, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.165729522705078, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0012231811415404081, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 29.617046356201172, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.03892673924565315, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.576860427856445, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.03922191634774208, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.024139404296875, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0030940501019358635, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.130508422851562, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.03363807871937752, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 12.736977577209473, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.04354558140039444, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.227702140808105, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.0637601986527443, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 11.701126098632812, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.03448743745684624, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.818361759185791, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.047110579907894135, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.17286644876003265, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.006085975095629692, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.92862892150879, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.002984287915751338, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 29.663928985595703, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.05215289071202278, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.439010620117188, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.03861791267991066, "pnorm/_forward_module.model.norm.weight": 27.63249969482422, "gnorm/_forward_module.model.norm.weight": 0.006089869886636734, "pnorm/_forward_module.lm_head.weight": 203.3670654296875, "gnorm/_forward_module.lm_head.weight": 0.0928235799074173} +{"step": 713031680, "pnorm/_forward_module.model.embeddings.weight": 135.25180053710938, "gnorm/_forward_module.model.embeddings.weight": 0.0843285322189331, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.942996978759766, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.003430221462622285, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 14.337125778198242, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.010223323479294777, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.172155380249023, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.01344671193510294, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.205398559570312, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.11585825681686401, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.171051025390625, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.11454811692237854, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0283870697021484, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.009221279993653297, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.2718670666217804, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.000892713840585202, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.29642677307129, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0018370113102719188, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.10466194152832, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.06413992494344711, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.178146362304688, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.07044485211372375, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.433055877685547, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.002148950705304742, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.78374195098877, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.009820623323321342, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.680913925170898, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.014268044382333755, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.524354934692383, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.07715494930744171, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.60086441040039, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.07190807163715363, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.230830430984497, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.01210275199264288, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.13904543220996857, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0009636766626499593, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.365514755249023, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0012127895606681705, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.317771911621094, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.047282226383686066, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.323959350585938, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03823622688651085, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.654603958129883, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0019197195069864392, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.36749267578125, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.00931661669164896, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.050914764404297, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.014471595175564289, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.786714553833008, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.06824307888746262, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.924304962158203, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.052845802158117294, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.7750260829925537, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.008208861574530602, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.1728971004486084, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0008198431460186839, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.53038787841797, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0014916908694431186, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 27.882265090942383, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.04683845117688179, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.6868896484375, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.03774050995707512, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.895828247070312, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0018206179374828935, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 13.028973579406738, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.008112844079732895, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 12.62612247467041, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.011301987804472446, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.064562797546387, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.06587550044059753, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.30220890045166, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.05009397864341736, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.5926127433776855, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.006535707972943783, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.1433146893978119, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0004406883963383734, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.488460540771484, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0033001156989485025, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 28.102720260620117, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.06384614109992981, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.748334884643555, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.0492081455886364, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.79669761657715, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0071243117563426495, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 13.573709487915039, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.052821096032857895, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 12.906181335449219, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.08689448237419128, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 11.815587043762207, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.12187599390745163, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 12.764604568481445, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.03672080114483833, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.5706210136413574, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.014068582095205784, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.14384454488754272, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0021231865976005793, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.177291870117188, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0013400536263361573, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 29.672954559326172, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.04063091427087784, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.612407684326172, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.03430015221238136, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.043554306030273, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.002261859131976962, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.182646751403809, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.017538679763674736, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 12.77649211883545, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.02552494965493679, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.252890586853027, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.06143781170248985, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 11.745694160461426, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.030048469081521034, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.842107057571411, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.023395542055368423, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.17508681118488312, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0030105591285973787, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.945880889892578, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0025402538012713194, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 29.74236297607422, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.049291957169771194, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.487258911132812, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.03721962124109268, "pnorm/_forward_module.model.norm.weight": 27.771671295166016, "gnorm/_forward_module.model.norm.weight": 0.007488827686756849, "pnorm/_forward_module.lm_head.weight": 204.98007202148438, "gnorm/_forward_module.lm_head.weight": 0.08621737360954285} +{"step": 734003200, "pnorm/_forward_module.model.embeddings.weight": 135.67218017578125, "gnorm/_forward_module.model.embeddings.weight": 0.09797845035791397, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.935222625732422, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0042325761169195175, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 14.385210990905762, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.010008271783590317, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.21323013305664, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.012262441217899323, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.189654350280762, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.13689392805099487, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.15589427947998, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.13134519755840302, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.030973196029663, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.009610936976969242, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.27541565895080566, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.000648517336230725, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.279727935791016, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.002154143527150154, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.098203659057617, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.07807356864213943, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.1755313873291, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.0863145962357521, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.423696517944336, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0029226297046989202, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.800469398498535, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.011845839209854603, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.694205284118652, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.019389821216464043, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.512797355651855, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.09587068110704422, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.58995246887207, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.0819927304983139, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.2381293773651123, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.01637057587504387, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.14024761319160461, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0014985011657699943, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.36261558532715, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0015060951700434089, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.337013244628906, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.05649150162935257, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.337108612060547, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.045231644064188004, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.65395736694336, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.002409205539152026, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.393370628356934, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.01227942667901516, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.068059921264648, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.02117595262825489, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.783492088317871, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.0810232013463974, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.922270774841309, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.056500144302845, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.7944509983062744, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.01150998380035162, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.17465318739414215, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0007179492968134582, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.529285430908203, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.001459812861867249, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 27.90689468383789, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.05392614006996155, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.70524024963379, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.04434484988451004, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.90747833251953, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0021234890446066856, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 13.091876029968262, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.008881035260856152, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 12.675975799560547, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.01366397924721241, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.070077896118164, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.07936809211969376, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.310572624206543, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.054264724254608154, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.602910280227661, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.008299482986330986, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.14377465844154358, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0006166360690258443, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.474451065063477, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0024151629768311977, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 28.09170913696289, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.06485194712877274, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.740224838256836, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.06089286878705025, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.817415237426758, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.007204937282949686, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 13.623347282409668, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.04574649780988693, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 12.946109771728516, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.07227539271116257, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 11.845100402832031, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.15660282969474792, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 12.836613655090332, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.0376940593123436, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.574814558029175, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.009147176519036293, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.14430370926856995, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0010895373998209834, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.189128875732422, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0013427375815808773, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 29.727130889892578, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.04020010307431221, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.646780014038086, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.03395417705178261, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.062808990478516, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0020382797811180353, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.233829498291016, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.011584911495447159, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 12.81599235534668, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.016036294400691986, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.277209281921387, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.05971907824277878, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 11.789109230041504, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.02824847400188446, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.8578908443450928, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.013760969042778015, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.1764129102230072, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0018451636424288154, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.96306800842285, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0024017433170229197, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 29.816665649414062, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.04546718671917915, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.533390045166016, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.03587469086050987, "pnorm/_forward_module.model.norm.weight": 27.909011840820312, "gnorm/_forward_module.model.norm.weight": 0.0077365986071527, "pnorm/_forward_module.lm_head.weight": 206.50244140625, "gnorm/_forward_module.lm_head.weight": 0.0653042197227478} +{"step": 754974720, "pnorm/_forward_module.model.embeddings.weight": 136.07118225097656, "gnorm/_forward_module.model.embeddings.weight": 0.09665370732545853, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.928749084472656, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.004038245417177677, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 14.43343734741211, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.011147202923893929, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.254497528076172, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.013266954571008682, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.17577838897705, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.14149537682533264, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.142879486083984, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.14144586026668549, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.033261299133301, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.009792673401534557, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.27849239110946655, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.00156656454782933, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.264204025268555, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.002007000846788287, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.093563079833984, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.0789017304778099, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.1737117767334, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.08237800002098083, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.415058135986328, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0029452999588102102, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.818763732910156, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.012610075995326042, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.708589553833008, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.018520155921578407, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.502131462097168, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.1029830053448677, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.579854965209961, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.09005337953567505, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.2487385272979736, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.021148694679141045, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.14180296659469604, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.003473537042737007, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.361530303955078, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0018854454392567277, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.35826301574707, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.059692032635211945, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.35123062133789, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.046181656420230865, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.653247833251953, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.003549326444044709, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.419600486755371, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.01933566853404045, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.085742950439453, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.03172016888856888, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.780291557312012, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.09428482502698898, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.91946792602539, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.05947402864694595, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.8112356662750244, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.011218794621527195, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.17617270350456238, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0011402466334402561, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.53093719482422, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0016213735798373818, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 27.934123992919922, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.053675755858421326, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.72575569152832, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.0413532480597496, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.92035484313965, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.002221454866230488, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 13.158726692199707, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.010351826436817646, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 12.730569839477539, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.013927721418440342, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.076579093933105, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.07810936868190765, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.319425582885742, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.054055262356996536, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.6128735542297363, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.011155880987644196, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.1445290595293045, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.001297173323109746, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.46291160583496, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.005117642227560282, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 28.086503982543945, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.08388591557741165, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.737369537353516, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.055967479944229126, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.835233688354492, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.009401131421327591, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 13.668595314025879, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.06999868899583817, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 12.983047485351562, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.11377683281898499, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 11.87049388885498, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.11390859633684158, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 12.899470329284668, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.04136918485164642, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.581475257873535, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.016778109595179558, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.14490912854671478, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0029635983519256115, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.20096778869629, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.001547745312564075, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 29.779525756835938, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.04443550109863281, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.67987632751465, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.036157261580228806, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.081758499145508, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0023551390040665865, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.28552532196045, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.014331337064504623, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 12.854262351989746, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.023810744285583496, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.30121898651123, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.06704218685626984, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 11.832493782043457, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.030763130635023117, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.872176170349121, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.015274704433977604, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.17765051126480103, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0018217448377981782, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.97845458984375, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.00269183237105608, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 29.88664436340332, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.05265451967716217, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.57429313659668, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.04224680736660957, "pnorm/_forward_module.model.norm.weight": 28.04055404663086, "gnorm/_forward_module.model.norm.weight": 0.005349005572497845, "pnorm/_forward_module.lm_head.weight": 207.9368896484375, "gnorm/_forward_module.lm_head.weight": 0.08904996514320374} +{"step": 775946240, "pnorm/_forward_module.model.embeddings.weight": 136.4510498046875, "gnorm/_forward_module.model.embeddings.weight": 0.06702663749456406, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.924442291259766, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.002772872569039464, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 14.48292350769043, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.007852975279092789, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.29662799835205, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.009227829985320568, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.16481876373291, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.09252835065126419, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.132954597473145, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.09666883200407028, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.037780523300171, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.006196698173880577, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.28171810507774353, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0008407388813793659, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.25069236755371, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.00144023762550205, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.0922908782959, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.05162239819765091, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.17380714416504, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.05353643372654915, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.406391143798828, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.001691583194769919, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.838111877441406, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.00833314098417759, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.723450660705566, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.011154571548104286, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.491730690002441, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.06417413800954819, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.569997787475586, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.061382997781038284, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.256359100341797, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.01023099198937416, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1432262659072876, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.001046627527102828, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.357324600219727, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.001106876297853887, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.375213623046875, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.03975257650017738, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.36290740966797, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.0312957763671875, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.652956008911133, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0015097418799996376, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.446064949035645, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.008198934607207775, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.104392051696777, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.0118746692314744, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.77802562713623, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.05310474708676338, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.917634963989258, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.04326550289988518, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.8287906646728516, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.007582833990454674, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.17783313989639282, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0009439038694836199, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.5314998626709, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.00109114870429039, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 27.959911346435547, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.038537293672561646, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.745296478271484, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.030721524730324745, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.9329891204834, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0015021024737507105, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 13.22321891784668, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.006807069759815931, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 12.783366203308105, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.010234990157186985, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.081950187683105, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.05122341960668564, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.32691764831543, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.03892672061920166, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.6285645961761475, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.0059224264696240425, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.145813450217247, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0005452464683912694, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.45343589782715, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.001834383117966354, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 28.082847595214844, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.04507686570286751, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.734317779541016, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.03906684368848801, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.847808837890625, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.004432880785316229, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 13.710033416748047, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.034978367388248444, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.018083572387695, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.04563557356595993, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 11.894393920898438, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.09197548776865005, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 12.958243370056152, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.02964828349649906, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.5882375240325928, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.006567356642335653, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.14560608565807343, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0004977292264811695, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.21078109741211, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0009657903574407101, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 29.82769775390625, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.031321533024311066, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.711078643798828, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.026971373707056046, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.102161407470703, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0013352916575968266, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.338165283203125, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.00841483473777771, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 12.893717765808105, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.012700546532869339, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.324860572814941, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.03870975971221924, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 11.875473022460938, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.02258933149278164, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.8886866569519043, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.010698539204895496, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.1792582869529724, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.001250137691386044, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 22.991636276245117, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0017109822947531939, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 29.94814682006836, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.035355377942323685, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.612878799438477, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.029855884611606598, "pnorm/_forward_module.model.norm.weight": 28.16875648498535, "gnorm/_forward_module.model.norm.weight": 0.006586446426808834, "pnorm/_forward_module.lm_head.weight": 209.2964630126953, "gnorm/_forward_module.lm_head.weight": 0.05597059428691864} +{"step": 796917760, "pnorm/_forward_module.model.embeddings.weight": 136.81356811523438, "gnorm/_forward_module.model.embeddings.weight": 0.08052036166191101, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.9164981842041, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0034140809439122677, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 14.528804779052734, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.008100314997136593, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.335999488830566, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.009168094024062157, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.149456977844238, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.12190574407577515, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.118474960327148, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.11563693732023239, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0381813049316406, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.00678676925599575, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.2840569019317627, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0009607343818061054, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.232818603515625, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0018513007089495659, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.083593368530273, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.06649553775787354, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.169845581054688, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.07527559250593185, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.39675521850586, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0025563635863363743, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.854055404663086, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.009285985492169857, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.735601425170898, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.016212014481425285, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.480488777160645, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.08777830004692078, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.5595703125, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.06972800195217133, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.2647485733032227, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.010953822173178196, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1445082426071167, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0009063383913598955, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.353683471679688, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0013597582001239061, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.39214324951172, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.04984012991189957, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.374515533447266, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.040256716310977936, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.65257453918457, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0019008142407983541, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.470624923706055, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.010694299824535847, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.121496200561523, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.01677878201007843, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.77488899230957, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.0706184133887291, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.915802955627441, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.04907408356666565, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.8470351696014404, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.010611104778945446, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.17924396693706512, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0009145158692263067, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.530994415283203, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0015618964098393917, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 27.982799530029297, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.047711893916130066, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.76336669921875, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.039370957762002945, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.944488525390625, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0018061203882098198, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 13.284429550170898, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.007812881842255592, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 12.833401679992676, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.01265917532145977, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.08626651763916, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.06614292412996292, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.334331512451172, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.047124557197093964, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.641245126724243, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.007162606343626976, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.14668002724647522, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0007564174593426287, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.442354202270508, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.003691149177029729, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 28.073816299438477, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.06233417987823486, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.728336334228516, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.06009867042303085, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.864547729492188, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.007935965433716774, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 13.750018119812012, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.049821723252534866, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.050531387329102, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.07849938422441483, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 11.920889854431152, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.14597684144973755, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.024733543395996, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.035511769354343414, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.5918869972229004, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.012190493755042553, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.1458360254764557, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.002054681070148945, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.220924377441406, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0011634617112576962, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 29.874568939208984, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.03737368807196617, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.741676330566406, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.03141983598470688, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.123369216918945, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.001929140416905284, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.387592315673828, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.012281330302357674, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 12.930367469787598, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.02187546156346798, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.350813865661621, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.058840278536081314, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 11.921873092651367, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.027115095406770706, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.909567356109619, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.012964163906872272, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.18104052543640137, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.001661529066041112, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.00706672668457, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0027390264440327883, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.0133056640625, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.04890192672610283, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.65381622314453, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.03935280069708824, "pnorm/_forward_module.model.norm.weight": 28.297029495239258, "gnorm/_forward_module.model.norm.weight": 0.007687462959438562, "pnorm/_forward_module.lm_head.weight": 210.59397888183594, "gnorm/_forward_module.lm_head.weight": 0.08222661912441254} +{"step": 817889280, "pnorm/_forward_module.model.embeddings.weight": 137.15567016601562, "gnorm/_forward_module.model.embeddings.weight": 0.07306475192308426, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.911684036254883, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0029923662077635527, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 14.572299003601074, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.007676415611058474, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.373515129089355, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.009735281579196453, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.138505935668945, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.09928611665964127, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.10787296295166, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.09428305178880692, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0419728755950928, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.006082577630877495, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.2869444787502289, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0004130627494305372, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.21906852722168, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.001391372294165194, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.08026885986328, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.053593724966049194, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.168737411499023, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.057493679225444794, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.38874053955078, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0019823594484478235, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.871614456176758, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.008674097247421741, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.749263763427734, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.011728547513484955, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.470274925231934, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.06578321754932404, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.54985237121582, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.05714655667543411, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.271392583847046, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.009599674493074417, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1457970291376114, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0010318869026377797, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.3531436920166, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0011097453534603119, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.412086486816406, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.04084980860352516, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.387971878051758, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03246660158038139, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.652427673339844, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0015836319653317332, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.4940767288208, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.009547404013574123, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.137714385986328, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.013843519613146782, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.77287483215332, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.05532437562942505, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.914285659790039, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.041457608342170715, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.8592519760131836, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.007080974522978067, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.1802183836698532, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0005010329186916351, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.53291130065918, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.001173767144791782, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.00729751586914, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.03921736031770706, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.7824764251709, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.0325540266931057, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.955636978149414, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.001469228183850646, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 13.342194557189941, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.006825422868132591, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 12.880573272705078, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.010694421827793121, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.091304779052734, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.052316002547740936, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.341573715209961, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.03867855295538902, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.6573097705841064, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.006216119509190321, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.1481006145477295, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0005404132534749806, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.4321346282959, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.002337028505280614, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 28.065975189208984, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.04865556210279465, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.723478317260742, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.04619142785668373, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.87859535217285, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.005220006685703993, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 13.78917407989502, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.03685825690627098, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.082464218139648, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.051995955407619476, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 11.942051887512207, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.10951948910951614, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.077922821044922, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.02899787947535515, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.596938371658325, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.00681950943544507, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.1463233381509781, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0006562539492733777, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.227378845214844, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0009639169438742101, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 29.913646697998047, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.03146327659487724, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.767486572265625, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.02676074579358101, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.143016815185547, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0014917904045432806, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.433774948120117, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.009250866249203682, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 12.964123725891113, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.013757641427218914, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.37610149383545, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.040997765958309174, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 11.966365814208984, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.02159370295703411, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.9272210597991943, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.005521716084331274, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.18270361423492432, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0008046979201026261, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.02057647705078, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0017612571828067303, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.072376251220703, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.034296926110982895, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.69012451171875, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.029622385278344154, "pnorm/_forward_module.model.norm.weight": 28.418928146362305, "gnorm/_forward_module.model.norm.weight": 0.005179626867175102, "pnorm/_forward_module.lm_head.weight": 211.81265258789062, "gnorm/_forward_module.lm_head.weight": 0.05952082946896553} +{"step": 838860800, "pnorm/_forward_module.model.embeddings.weight": 137.48036193847656, "gnorm/_forward_module.model.embeddings.weight": 0.09313131868839264, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.904102325439453, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0038445978425443172, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 14.611473083496094, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.009492487646639347, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.407427787780762, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.012324853800237179, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.124113082885742, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.1301378756761551, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.093955993652344, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.12265733629465103, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0434606075286865, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.008068287745118141, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.28935256600379944, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.000808750803116709, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.20477867126465, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0019600018858909607, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.0754451751709, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.07178672403097153, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.166736602783203, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.08670590817928314, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.38014030456543, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0025244757998734713, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.88676643371582, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.009810651652514935, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.760666847229004, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.014785516075789928, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.45954418182373, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.09047947078943253, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.539627075195312, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.0755116418004036, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.2792694568634033, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.013937395997345448, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1469656080007553, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0018349724123254418, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.351612091064453, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.001438191975466907, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.42933464050293, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.05550767481327057, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.400068283081055, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.046776220202445984, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.652629852294922, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.002020950196310878, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.514494895935059, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.010147780179977417, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.151631355285645, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.015554082579910755, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.771052360534668, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.07916654646396637, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.913415908813477, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.05663143843412399, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.874633312225342, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.007942156866192818, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.18169501423835754, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0007079153438098729, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.534521102905273, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0014439038932323456, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.030336380004883, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.05308162793517113, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.800739288330078, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.04778396338224411, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.967052459716797, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0019272251520305872, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 13.401864051818848, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.008206233382225037, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 12.929579734802246, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.013249550946056843, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.095524787902832, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.07447800040245056, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.349016189575195, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.054812002927064896, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.66736102104187, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.007930290885269642, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.14861464500427246, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0008915287908166647, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.422279357910156, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.002310918876901269, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 28.05714225769043, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.06057966500520706, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.718547821044922, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.07826043665409088, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.892946243286133, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.008712111972272396, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 13.828168869018555, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.029701588675379753, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.114505767822266, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.04988124221563339, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 11.964015007019043, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.20141983032226562, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.133099555969238, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.042315687984228134, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6013293266296387, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.008064966648817062, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.14660102128982544, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0009609289700165391, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.235692977905273, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.001311396132223308, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 29.953371047973633, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.04444218799471855, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.794160842895508, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.037128567695617676, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.163787841796875, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0023145084269344807, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.480284690856934, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.014406885951757431, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 12.99943733215332, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.020625775679945946, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.402484893798828, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.0665602907538414, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.013307571411133, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.031850580126047134, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.941514492034912, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.01993757300078869, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.18392597138881683, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.002546690870076418, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.034284591674805, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.001917787827551365, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.129621505737305, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.04730075225234032, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.72633171081543, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.03783448040485382, "pnorm/_forward_module.model.norm.weight": 28.54103660583496, "gnorm/_forward_module.model.norm.weight": 0.004493799060583115, "pnorm/_forward_module.lm_head.weight": 212.9751739501953, "gnorm/_forward_module.lm_head.weight": 0.07397575676441193} +{"step": 859832320, "pnorm/_forward_module.model.embeddings.weight": 137.78717041015625, "gnorm/_forward_module.model.embeddings.weight": 0.07790575176477432, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.898059844970703, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0028329272754490376, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 14.650580406188965, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.010270954109728336, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.440414428710938, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.012349704280495644, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.111501693725586, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.10177499055862427, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.082037925720215, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.10354442149400711, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0450613498687744, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.009072750806808472, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.2918230891227722, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0017048836452886462, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.19234848022461, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0018490381771698594, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.07198715209961, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.056418292224407196, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.165573120117188, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.06694705784320831, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.373323440551758, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.002049371600151062, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.905255317687988, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.007867963053286076, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.774893760681152, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.011686655692756176, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.449787139892578, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.06882378458976746, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.530340194702148, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.06434841454029083, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.2873880863189697, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.009577876888215542, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1481987088918686, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0009587508393451571, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.35097312927246, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.001249333145096898, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.44635009765625, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.045225050300359726, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.412385940551758, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03806111589074135, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.653186798095703, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0017880358500406146, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.537456512451172, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.01044592447578907, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.16769790649414, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.01672854833304882, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.768730163574219, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.058966685086488724, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.912076950073242, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.04515902325510979, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.8876445293426514, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.012124202214181423, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.1828436702489853, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0013642574194818735, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.535856246948242, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0016433449927717447, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.0516300201416, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.043468303978443146, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.8176212310791, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.03748737648129463, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.97846221923828, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0016480914782732725, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 13.458708763122559, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.006852276623249054, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 12.976369857788086, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.009503553621470928, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.099708557128906, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.05512210726737976, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.356176376342773, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.042836956679821014, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.6783370971679688, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.006694842129945755, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.14942270517349243, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.000580135325435549, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.41396713256836, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.005634963978081942, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 28.0504150390625, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.07492616027593613, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.714599609375, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.06694575399160385, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.904212951660156, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.010878123342990875, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 13.863441467285156, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.08675549924373627, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.143143653869629, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.12792693078517914, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 11.983341217041016, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.12309857457876205, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.180700302124023, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.03138117119669914, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6056783199310303, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.010836937464773655, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.14692369103431702, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0019584959372878075, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.242874145507812, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0010223044082522392, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 29.989566802978516, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.03304382413625717, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.818445205688477, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.027277396991848946, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.183143615722656, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0013851848198100924, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.520170211791992, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.007553393952548504, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.029318809509277, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.012782802805304527, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.428756713867188, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.043019574135541916, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.059635162353516, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.023323651403188705, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.9574320316314697, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.004945255815982819, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.18545708060264587, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.000463858712464571, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.04660415649414, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0015706942649558187, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.18092155456543, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.035254400223493576, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.758848190307617, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.02908034808933735, "pnorm/_forward_module.model.norm.weight": 28.657909393310547, "gnorm/_forward_module.model.norm.weight": 0.006206231191754341, "pnorm/_forward_module.lm_head.weight": 214.06825256347656, "gnorm/_forward_module.lm_head.weight": 0.06830868124961853} +{"step": 880803840, "pnorm/_forward_module.model.embeddings.weight": 138.0771942138672, "gnorm/_forward_module.model.embeddings.weight": 0.07754462212324142, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.890871047973633, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.002969298278912902, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 14.687477111816406, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.009073873050510883, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.47184944152832, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.010606960393488407, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.097187995910645, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.10125336796045303, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.068337440490723, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.10308533161878586, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0472774505615234, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.008020934648811817, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.29391297698020935, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0014815657632425427, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.178890228271484, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0014439505757763982, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.06619644165039, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.05621187761425972, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.162906646728516, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.06921005249023438, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.36644172668457, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0019526705145835876, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.921900749206543, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.008567255921661854, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.78735637664795, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.012029530480504036, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.440206527709961, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.0717366635799408, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.521330833435059, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.0635373666882515, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.2920939922332764, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.010416864417493343, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.14897780120372772, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0010864452924579382, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.349994659423828, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0015165774384513497, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.461584091186523, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.0470227375626564, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.42327308654785, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.04030859097838402, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.653400421142578, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0024126956705003977, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.557628631591797, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.013639801181852818, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.182415962219238, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.02259022928774357, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.766500473022461, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.06299328058958054, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.91079044342041, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.04650496318936348, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.8965961933135986, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.010797441937029362, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.18371102213859558, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0008548683836124837, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.537261962890625, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.001784618361853063, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.071964263916016, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.04673202708363533, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.83412742614746, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.03996175155043602, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.989818572998047, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0017111891647800803, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 13.512073516845703, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.007782041560858488, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.019919395446777, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.010800608433783054, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.104558944702148, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.06054375693202019, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.363916397094727, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.04633874073624611, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.689622640609741, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.007139412686228752, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.15011364221572876, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0007528822752647102, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.405494689941406, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.006427795626223087, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 28.041872024536133, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.08871942013502121, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.709300994873047, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.07311833649873734, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.915834426879883, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.011619100347161293, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 13.899264335632324, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.103671595454216, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.172654151916504, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.15635330975055695, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.003082275390625, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.1267338991165161, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.229049682617188, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.033501673489809036, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.612168073654175, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.017327819019556046, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.14747312664985657, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0032758451998233795, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.250064849853516, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0010450172703713179, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.02389907836914, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.034848641604185104, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.84201431274414, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.029173744842410088, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.204444885253906, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.001582830329425633, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.562796592712402, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.00954193715006113, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.06179428100586, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.015483209863305092, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.455950736999512, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.048251863569021225, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.106938362121582, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.025236256420612335, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.9697415828704834, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.008209239691495895, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.18652121722698212, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0011435297783464193, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.058616638183594, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0017791048157960176, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.229463577270508, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.040095873177051544, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.789758682250977, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.033052071928977966, "pnorm/_forward_module.model.norm.weight": 28.77206802368164, "gnorm/_forward_module.model.norm.weight": 0.006384439300745726, "pnorm/_forward_module.lm_head.weight": 215.10980224609375, "gnorm/_forward_module.lm_head.weight": 0.06594003736972809} +{"step": 901775360, "pnorm/_forward_module.model.embeddings.weight": 138.3507537841797, "gnorm/_forward_module.model.embeddings.weight": 0.07962066680192947, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.884685516357422, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.003393898718059063, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 14.722733497619629, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.008997690863907337, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.501851081848145, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.010751748457551003, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.084761619567871, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.11401937156915665, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.056323051452637, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.10906094312667847, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.050029993057251, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.007609283551573753, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.29607462882995605, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0005226320354267955, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.16652488708496, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0016849333187565207, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.061954498291016, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.06175040453672409, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.16067886352539, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.07130613178014755, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.35801124572754, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.002139550633728504, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.935014724731445, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.00838969275355339, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.797394752502441, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.014542078599333763, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.429153442382812, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.07660187035799026, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.510754585266113, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.06591671705245972, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.298287868499756, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.010088038630783558, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.14998693764209747, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0006047643255442381, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.349336624145508, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0010848947567865252, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.47783851623535, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.04561617597937584, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.43431854248047, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.039701882749795914, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.652725219726562, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.001762392115779221, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.577519416809082, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.009919407777488232, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.196560859680176, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.015906114131212234, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.762578964233398, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.06347990781068802, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.90758991241455, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.0470866784453392, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.9039008617401123, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.008879845961928368, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.1842811405658722, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.000620411999989301, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.53880500793457, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0013260712148621678, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.091814041137695, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.04767102748155594, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.850297927856445, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.039937350898981094, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 22.998720169067383, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0018637333996593952, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 13.563468933105469, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.00731686782091856, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.061847686767578, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.01148783229291439, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.10639762878418, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.06562943011522293, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.368574142456055, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.04693746939301491, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.696267604827881, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.006115013733506203, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.1503981202840805, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0007711807847954333, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.39792823791504, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0022575175389647484, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 28.034936904907227, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.0568813756108284, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.705678939819336, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.06595637649297714, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.926895141601562, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0070278942584991455, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 13.933919906616211, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.035525571554899216, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.20022201538086, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.05504473298788071, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.021612167358398, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.1563316285610199, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.27539348602295, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.0368221178650856, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6145365238189697, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.007754233665764332, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.14768607914447784, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0008954785880632699, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.25664710998535, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.00115203857421875, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.055824279785156, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.038664184510707855, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.864500045776367, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.03388933837413788, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.225255966186523, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0019938009791076183, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.604026794433594, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.014227744191884995, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.091958045959473, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.022390512749552727, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.482406616210938, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.05683751031756401, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.152392387390137, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.029256543144583702, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.9851722717285156, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.011692659929394722, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.18791569769382477, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0015655255410820246, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.071069717407227, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.003198722843080759, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.278606414794922, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.05507264658808708, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.820466995239258, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.043784767389297485, "pnorm/_forward_module.model.norm.weight": 28.88334846496582, "gnorm/_forward_module.model.norm.weight": 0.00421614246442914, "pnorm/_forward_module.lm_head.weight": 216.08831787109375, "gnorm/_forward_module.lm_head.weight": 0.08744513243436813} +{"step": 922746880, "pnorm/_forward_module.model.embeddings.weight": 138.60733032226562, "gnorm/_forward_module.model.embeddings.weight": 0.08322753757238388, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.879640579223633, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.003111126134172082, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 14.760520935058594, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.011305867694318295, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.534171104431152, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.014675194397568703, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.073094367980957, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.11106320470571518, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.045234680175781, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.11165151000022888, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0524580478668213, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.011733824387192726, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.29827117919921875, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.001755175762809813, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.1544132232666, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.001771178562194109, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.058053970336914, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.06274783611297607, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.158641815185547, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.08033251762390137, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.350727081298828, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0021980642341077328, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.949515342712402, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.008747179061174393, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.80878734588623, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.013611843809485435, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.41847038269043, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.08018230646848679, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.500429153442383, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.07108448445796967, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3065409660339355, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.010188620537519455, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.15118350088596344, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.000924665539059788, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.348684310913086, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.001344715477898717, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.492963790893555, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.04858439788222313, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.444839477539062, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.045708067715168, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.652889251708984, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0019817634020000696, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.597678184509277, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.01216124091297388, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.210389137268066, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.01919635944068432, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.759647369384766, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.06971848011016846, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.905094146728516, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.05237172171473503, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.911529064178467, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.0074843461625278, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.18490399420261383, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0007115071057341993, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.539724349975586, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.002168291946873069, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.109777450561523, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.05195530876517296, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.86505699157715, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.0480070561170578, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.010297775268555, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0021961042657494545, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 13.618274688720703, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.0077175572514534, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.107499122619629, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.011345582082867622, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.110452651977539, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.07448416948318481, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.375205039978027, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.05388481914997101, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.7069060802459717, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.006850524339824915, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.1512531340122223, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0008338516345247626, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.388946533203125, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.010369345545768738, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 28.02493667602539, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.13141953945159912, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.70037269592285, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.10383348166942596, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.93909454345703, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.018692364916205406, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 13.96845817565918, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.16226662695407867, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.2284574508667, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.24402998387813568, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.039422988891602, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.15160495042800903, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.319926261901855, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.040018342435359955, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6171517372131348, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.025252051651477814, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.14780670404434204, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.004819393157958984, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.26189613342285, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0013223597779870033, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.084327697753906, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.04251554235816002, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.884870529174805, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.038249291479587555, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.243213653564453, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0028273393400013447, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.641621589660645, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.02764587104320526, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.118905067443848, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.040406979620456696, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.507889747619629, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.0674942135810852, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.196720123291016, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.03240145370364189, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 2.9991023540496826, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.04004433751106262, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.1890658140182495, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.005457755643874407, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.081066131591797, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0034475282300263643, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.320173263549805, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.05643065646290779, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.847640991210938, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.04473719373345375, "pnorm/_forward_module.model.norm.weight": 28.990018844604492, "gnorm/_forward_module.model.norm.weight": 0.005949206650257111, "pnorm/_forward_module.lm_head.weight": 217.01785278320312, "gnorm/_forward_module.lm_head.weight": 0.07953161001205444} +{"step": 943718400, "pnorm/_forward_module.model.embeddings.weight": 138.84921264648438, "gnorm/_forward_module.model.embeddings.weight": 0.07624714821577072, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.875213623046875, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0031460104510188103, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 14.798386573791504, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.010196392424404621, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.56687068939209, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.012099139392375946, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.061956405639648, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.10841397196054459, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.034480094909668, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.10437111556529999, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0534472465515137, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.00916266068816185, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3003646731376648, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0017530412878841162, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.143449783325195, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0020874300971627235, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.054874420166016, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.06170782074332237, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.15726089477539, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.07001261413097382, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.34522819519043, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0020450351294130087, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.967585563659668, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.008602139540016651, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.82288646697998, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.012607133015990257, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.409636497497559, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.07303415238857269, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.491878509521484, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.06264852732419968, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3118090629577637, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.011371572501957417, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1521574854850769, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0009651107247918844, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.34918975830078, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0011919132666662335, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.508615493774414, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.044933196157217026, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.455951690673828, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03935163468122482, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.654170989990234, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0016324358293786645, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.618616104125977, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.00976874865591526, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.225708961486816, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.013661352917551994, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.757463455200195, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.06335407495498657, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.903708457946777, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.045463770627975464, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.920682191848755, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.007150215562433004, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.1858544498682022, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0006556467269547284, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.54138946533203, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0016074825543910265, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.12820816040039, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.0451648011803627, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.880126953125, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.03980864956974983, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.020584106445312, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0017151037463918328, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 13.665728569030762, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.007428342010825872, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.147782325744629, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.011506403796374798, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.114429473876953, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.060709718614816666, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.38184642791748, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.04445919021964073, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.7189066410064697, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.006058309692889452, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.15211966633796692, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0007163059781305492, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.382266998291016, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.005861154291778803, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 28.019508361816406, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.08036880195140839, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.69823455810547, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.07608520984649658, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.948331832885742, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.010756433941423893, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.000246047973633, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.09124147891998291, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.254637718200684, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.13794298470020294, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.055002212524414, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.14676719903945923, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.356669425964355, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.03307241573929787, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6211512088775635, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.014119943603873253, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.14827148616313934, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0025893948040902615, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.266157150268555, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0012068506330251694, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.1107234954834, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.03442693501710892, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.903644561767578, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.028321463614702225, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.261348724365234, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0016691071214154363, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.680490493774414, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.009269801899790764, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.147109031677246, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.013604335486888885, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.53183650970459, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.04353237897157669, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.2379789352417, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.024037575349211693, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.0096969604492188, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.01194051280617714, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.18992476165294647, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0013074303278699517, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.091339111328125, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0017243754118680954, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.36115074157715, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.0374210923910141, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.874229431152344, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.031261712312698364, "pnorm/_forward_module.model.norm.weight": 29.09408187866211, "gnorm/_forward_module.model.norm.weight": 0.005774033721536398, "pnorm/_forward_module.lm_head.weight": 217.89305114746094, "gnorm/_forward_module.lm_head.weight": 0.05785857141017914} +{"step": 964689920, "pnorm/_forward_module.model.embeddings.weight": 139.07591247558594, "gnorm/_forward_module.model.embeddings.weight": 0.07028784602880478, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.87076187133789, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0027845825534313917, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 14.834098815917969, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.007678710389882326, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.597390174865723, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.00864249374717474, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.051107406616211, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.09591658413410187, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.023979187011719, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.09282176941633224, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0551347732543945, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.00551924854516983, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.30247342586517334, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0006491534877568483, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.133930206298828, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0016389281954616308, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.05354881286621, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.05343392863869667, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.156158447265625, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.06181265786290169, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.33982276916504, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0015886547043919563, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.98274040222168, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.007887713611125946, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.83408260345459, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.010775907896459103, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.401453971862793, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.06305572390556335, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.483895301818848, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.05757890269160271, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.319146156311035, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.008875452913343906, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.15314261615276337, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0006941997562535107, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.34961700439453, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0010493416339159012, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.52386474609375, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.041883330792188644, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.466291427612305, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03540477529168129, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.655302047729492, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0014208590146154165, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.639963150024414, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.008231588639318943, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.240742683410645, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.012410277500748634, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.754780769348145, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.05363842099905014, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.901590347290039, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.04165550321340561, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.9327147006988525, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.008343932218849659, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.1869235634803772, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.000678669661283493, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.542905807495117, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0012342343106865883, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.145462036132812, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.041036248207092285, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.894533157348633, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.035484977066516876, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.030536651611328, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0014286070363596082, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 13.711682319641113, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.006647253874689341, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.186254501342773, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.009806735441088676, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.11785888671875, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.05166175216436386, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.388224601745605, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.03994278982281685, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.7296149730682373, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.005646468605846167, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.15282922983169556, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0004459668416529894, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.37553596496582, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.002794423373416066, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 28.013690948486328, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.05249432101845741, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.69529151916504, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.06250771135091782, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.958187103271484, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.00719974422827363, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.03244686126709, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.0464463047683239, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.281266212463379, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.07107429206371307, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.069811820983887, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.13490496575832367, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.392578125, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.030409960076212883, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6252400875091553, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.007678812835365534, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.1485067456960678, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0011455094208940864, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.27158546447754, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0010899900225922465, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.1375789642334, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.031992290169000626, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.92276382446289, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.027345510199666023, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.278623580932617, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0014427416026592255, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.714934349060059, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.008447523228824139, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.172462463378906, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.014718485064804554, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.554821968078613, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.041622892022132874, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.277848243713379, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.022308126091957092, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.0209076404571533, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.007975267246365547, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.1910637617111206, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0011165319010615349, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.10101890563965, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0014861759264022112, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.401168823242188, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.033463478088378906, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.899372100830078, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.02892688848078251, "pnorm/_forward_module.model.norm.weight": 29.194299697875977, "gnorm/_forward_module.model.norm.weight": 0.0040795705281198025, "pnorm/_forward_module.lm_head.weight": 218.72731018066406, "gnorm/_forward_module.lm_head.weight": 0.05521472543478012} +{"step": 985661440, "pnorm/_forward_module.model.embeddings.weight": 139.2886199951172, "gnorm/_forward_module.model.embeddings.weight": 0.07668391615152359, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.865022659301758, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0032003733795136213, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 14.865357398986816, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.008960888721048832, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.624307632446289, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.010302674025297165, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.038922309875488, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.11064145714044571, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 11.012149810791016, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.10355636477470398, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0570433139801025, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.007219790946692228, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.30455416440963745, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0013168640434741974, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.123552322387695, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0015921760350465775, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.0497989654541, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.05815395340323448, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.154268264770508, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.06552004814147949, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.333303451538086, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0020972895435988903, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 11.994985580444336, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.00859068799763918, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.843391418457031, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.013676229864358902, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.391966819763184, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.07682494819164276, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.474723815917969, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.06329452246427536, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3264646530151367, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.010651466436684132, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.15409380197525024, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.001258069067262113, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.350027084350586, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0013605802087113261, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.53759002685547, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.04636695235967636, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.475919723510742, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03751422092318535, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.656238555908203, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0021541633177548647, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.659721374511719, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.012756227515637875, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.254344940185547, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.020817425101995468, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.752202033996582, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.06748413294553757, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.899840354919434, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.04530300945043564, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.9401068687438965, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.008123235777020454, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.18742074072360992, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0006395771633833647, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.545907974243164, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.001320989802479744, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.16341781616211, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.04423747956752777, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.909122467041016, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.03530231490731239, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.040157318115234, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0016309478087350726, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 13.75893497467041, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.006875166669487953, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.22574234008789, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.010927427560091019, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.120126724243164, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.05977539345622063, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.393068313598633, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.04201560840010643, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.7401058673858643, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.006810539402067661, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.15362486243247986, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0007151199970394373, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.36897087097168, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.004183096811175346, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 28.007558822631836, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.06222745403647423, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.69244956970215, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.059867363423109055, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.96744728088379, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.008287934586405754, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.061335563659668, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.06021680682897568, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.30449104309082, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.08536843955516815, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.083989143371582, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.12364578992128372, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.428215026855469, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.03195783123373985, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6288344860076904, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.010638413019478321, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.14880721271038055, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0018912700470536947, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.27546501159668, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0009797454113140702, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.160137176513672, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.03304336220026016, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.939476013183594, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.028942806646227837, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.298486709594727, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0014759199693799019, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.75019645690918, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.008108648471534252, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.197945594787598, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.013084894977509975, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.579996109008789, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.04238774627447128, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.321454048156738, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.02330756187438965, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.0293281078338623, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.010195871815085411, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.19167649745941162, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0014712996780872345, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.111074447631836, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0012838125694543123, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.439342498779297, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.03278152644634247, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.92447280883789, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.029324201866984367, "pnorm/_forward_module.model.norm.weight": 29.293825149536133, "gnorm/_forward_module.model.norm.weight": 0.005233574192970991, "pnorm/_forward_module.lm_head.weight": 219.51307678222656, "gnorm/_forward_module.lm_head.weight": 0.05025411397218704} +{"step": 1006632960, "pnorm/_forward_module.model.embeddings.weight": 139.487548828125, "gnorm/_forward_module.model.embeddings.weight": 0.08202654123306274, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.858976364135742, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.003164871595799923, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 14.895506858825684, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.007995068095624447, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.65054988861084, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.009360807947814465, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.026385307312012, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.11553651839494705, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.999734878540039, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.11148514598608017, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0582196712493896, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.0063555436208844185, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3063834607601166, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0006627185503020883, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.11319923400879, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.001705798669718206, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.045583724975586, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.06261274963617325, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.152156829833984, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.08009245246648788, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.327375411987305, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0022847603540867567, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.007720947265625, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.008796798065304756, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.852749824523926, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.012606353498995304, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.382744789123535, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.08068276941776276, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.465686798095703, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.06942670047283173, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3324317932128906, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.011321838945150375, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1550237536430359, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0008850694284774363, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.35182762145996, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0014828506391495466, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.55220603942871, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.05175565928220749, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.48614501953125, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.045449331402778625, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.656688690185547, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0023735351860523224, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.677657127380371, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.016884658485651016, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.267401695251465, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.02776111476123333, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.74897575378418, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.0697375237941742, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.897050857543945, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.04853399097919464, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.9460785388946533, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.009139111265540123, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.18808352947235107, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0012117435690015554, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.54708480834961, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.001501772552728653, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.17810821533203, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.05009433254599571, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.921483993530273, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.045857399702072144, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.04958152770996, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0017990035703405738, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 13.804372787475586, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.008582771755754948, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.264162063598633, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.012497692368924618, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.12248706817627, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.06616392731666565, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.398228645324707, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.04907340556383133, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.7481887340545654, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.009863453917205334, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.15420614182949066, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.00133492739405483, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.361860275268555, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0065680802799761295, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.998058319091797, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.09098481386899948, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.687936782836914, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.09313787519931793, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.976865768432617, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.012751942500472069, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.089095115661621, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.09690000861883163, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.326981544494629, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.1498832106590271, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.09833812713623, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.17308129370212555, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.464064598083496, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.037693511694669724, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6324639320373535, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.015360482037067413, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.1491069346666336, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0025777288246899843, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.2786865234375, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0011260703904554248, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.180788040161133, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.03884003311395645, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.955284118652344, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.03283848240971565, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.318437576293945, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.001918974332511425, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.784006118774414, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.017630575224757195, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.223045349121094, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.02662418596446514, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.605544090270996, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.058288127183914185, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.365303039550781, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.029117580503225327, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.0392916202545166, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.027073437348008156, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.19249887764453888, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0035678015556186438, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.12173843383789, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0031394329853355885, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.477251052856445, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.05264545977115631, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.949861526489258, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.044774238020181656, "pnorm/_forward_module.model.norm.weight": 29.391998291015625, "gnorm/_forward_module.model.norm.weight": 0.00413890415802598, "pnorm/_forward_module.lm_head.weight": 220.2489471435547, "gnorm/_forward_module.lm_head.weight": 0.0693630799651146} +{"step": 1027604480, "pnorm/_forward_module.model.embeddings.weight": 139.6724090576172, "gnorm/_forward_module.model.embeddings.weight": 0.06637056171894073, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.85521125793457, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0026198329869657755, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 14.928074836730957, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.00786060094833374, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.678533554077148, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.009697912260890007, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.016483306884766, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.08897929638624191, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.99013614654541, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.08761727064847946, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0597028732299805, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.006456926930695772, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.30845749378204346, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0006942301406525075, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.104955673217773, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.001432037097401917, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.044157028198242, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.0469973161816597, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.151172637939453, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.05353163182735443, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.322050094604492, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0016109725693240762, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.022160530090332, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.008073038421571255, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.863899230957031, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.010826574638485909, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.374053955078125, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.061457615345716476, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.457193374633789, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.05669055134057999, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.336965322494507, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.009808522649109364, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1557633876800537, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.000804198207333684, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.35203742980957, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0010569911682978272, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.563823699951172, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.04054016247391701, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.494369506835938, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03257935121655464, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.657363891601562, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0016886218218132854, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.69518756866455, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.011359715834259987, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.280423164367676, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.01869390346109867, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.746081352233887, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.052904535084962845, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.894645690917969, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.04003902152180672, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.9523768424987793, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.008204164914786816, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.18876434862613678, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0009876098483800888, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.549205780029297, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.001096532680094242, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.19305419921875, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.03915577009320259, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.93367576599121, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.030995361506938934, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.05843162536621, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0013426964869722724, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 13.846090316772461, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.006014530546963215, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.299057006835938, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.009035948663949966, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.124900817871094, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.04838737100362778, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.403033256530762, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.03747650608420372, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.756225109100342, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.006279331166297197, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.154885396361351, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0007857793243601918, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.3558349609375, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.002433867659419775, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.99136734008789, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.04852188006043434, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.685016632080078, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.0504339262843132, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.985698699951172, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0053016506135463715, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.116652488708496, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.040092941373586655, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.348920822143555, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.05807730183005333, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.110468864440918, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.10508173704147339, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.49388313293457, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.028480324894189835, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6360809803009033, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.006032820791006088, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.14939263463020325, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0005252675036899745, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.281238555908203, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0009190381970256567, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.199501037597656, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.030934356153011322, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.96921730041504, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.026251111179590225, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.335908889770508, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0012471135705709457, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.814128875732422, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.008840913884341717, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.244834899902344, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.014404053799808025, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.629244804382324, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.0355646014213562, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.40571403503418, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.020740680396556854, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.0484771728515625, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.015096310526132584, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.19318434596061707, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0019023859640583396, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.129314422607422, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0013083693338558078, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.5081787109375, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.03108375333249569, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.971071243286133, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.028177892789244652, "pnorm/_forward_module.model.norm.weight": 29.48297882080078, "gnorm/_forward_module.model.norm.weight": 0.0033662368077784777, "pnorm/_forward_module.lm_head.weight": 220.94615173339844, "gnorm/_forward_module.lm_head.weight": 0.049628376960754395} +{"step": 1048576000, "pnorm/_forward_module.model.embeddings.weight": 139.84425354003906, "gnorm/_forward_module.model.embeddings.weight": 0.06924313306808472, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.849884033203125, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.002932822797447443, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 14.957304000854492, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.008671816438436508, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.704094886779785, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.010107116773724556, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 11.005009651184082, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.09690851718187332, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.979009628295898, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.09618823230266571, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0617151260375977, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.008423103019595146, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.31050339341163635, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0014706659130752087, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.095823287963867, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0016583887627348304, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.04048728942871, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.05127527937293053, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.14910888671875, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.0640282928943634, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.316686630249023, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0018589666578918695, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.034772872924805, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.007925018668174744, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.873258590698242, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.010894826613366604, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.365485191345215, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.06369946897029877, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.448673248291016, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.06030477210879326, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.342480182647705, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.009126916527748108, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.15668606758117676, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0006818855181336403, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.353500366210938, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0012267276179045439, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.57660484313965, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.04333744943141937, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.50357437133789, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03876384720206261, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.65894889831543, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0016904508229345083, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.712225914001465, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.010549033991992474, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.292105674743652, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.016025898978114128, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.74414348602295, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.05743277072906494, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.893017768859863, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.0441085621714592, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.959207773208618, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.008659404702484608, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.1893964409828186, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0009225074900314212, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.551597595214844, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.001777914701960981, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.207677841186523, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.045367415994405746, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.946067810058594, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.040166568011045456, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.067049026489258, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0017212328966706991, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 13.887012481689453, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.007608877960592508, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.333354949951172, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.010842240415513515, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.127347946166992, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.0594380646944046, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.40748405456543, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.045160941779613495, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.7619638442993164, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.006605206523090601, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.15545381605625153, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0006917466525919735, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.350088119506836, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.008929718285799026, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.98515510559082, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.10872479528188705, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.682205200195312, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.087520070374012, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.993759155273438, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.015413915738463402, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.144143104553223, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.136297345161438, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.37141227722168, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.20360393822193146, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.122781753540039, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.09911473840475082, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.524133682250977, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.031149419024586678, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.639110565185547, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.018066557124257088, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.14967048168182373, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0034967479296028614, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.283578872680664, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0011935734655708075, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.21709442138672, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.03468415513634682, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.982534408569336, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.02825179696083069, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.35418701171875, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0015224748058244586, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.84285831451416, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.010665920563042164, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.265986442565918, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.017186380922794342, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.653043746948242, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.04091382399201393, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.446565628051758, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.02193254977464676, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.054612159729004, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.0109529634937644, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.19377201795578003, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0014103680150583386, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.137601852416992, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0016478314064443111, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.53940200805664, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.035671498626470566, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 20.992122650146484, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.03232651576399803, "pnorm/_forward_module.model.norm.weight": 29.573715209960938, "gnorm/_forward_module.model.norm.weight": 0.005862156394869089, "pnorm/_forward_module.lm_head.weight": 221.61390686035156, "gnorm/_forward_module.lm_head.weight": 0.06280854344367981} +{"step": 1069547520, "pnorm/_forward_module.model.embeddings.weight": 140.00381469726562, "gnorm/_forward_module.model.embeddings.weight": 0.059206489473581314, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.84528350830078, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0021452675573527813, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 14.986587524414062, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.007748179137706757, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.729864120483398, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.008741500787436962, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.994086265563965, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.07807821780443192, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.968427658081055, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.07737798243761063, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.063023090362549, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.005542707163840532, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.31211745738983154, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0006506852223537862, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.08713722229004, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0011353417066857219, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.037187576293945, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.042963698506355286, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.14740562438965, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.04417276009917259, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.31197166442871, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.001315577421337366, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.047471046447754, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.006785301491618156, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.883245468139648, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.009517776779830456, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.357666969299316, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.05220552533864975, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.441051483154297, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.04720500856637955, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3459794521331787, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.0085243945941329, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.15711236000061035, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0010079871863126755, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.35409927368164, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0009226043475791812, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.587289810180664, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.03480108082294464, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.511329650878906, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.02752545289695263, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.65998077392578, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0012771766632795334, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.729515075683594, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.007674704305827618, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.304014205932617, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.011065463535487652, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.741445541381836, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.043221235275268555, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.890974044799805, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.033897269517183304, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.9626033306121826, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.006020127795636654, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.1895192563533783, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0003803552535828203, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.553688049316406, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0009518302977085114, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.221162796020508, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.03357497602701187, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.957557678222656, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.027228349819779396, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.07465934753418, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0011154384119436145, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 13.923624992370605, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.005617361515760422, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.364293098449707, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.008059922605752945, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.129302024841309, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.03908143937587738, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.411881446838379, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.03187764063477516, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.7677037715911865, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.004308291245251894, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.15588678419589996, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0002910517214331776, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.345340728759766, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0023590964265167713, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.979209899902344, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.04427013546228409, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.679227828979492, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.04347294569015503, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 23.999900817871094, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.00519893504679203, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.167470932006836, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.040054526180028915, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.390833854675293, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.05669739469885826, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.134101867675781, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.08577784150838852, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.551108360290527, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.025900116190314293, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6422805786132812, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.006039837840944529, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.14992286264896393, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0007434178842231631, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.28639030456543, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0008732025744393468, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.234466552734375, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.028394581750035286, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 20.995880126953125, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.025867855176329613, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.372745513916016, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0014141695573925972, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.872519493103027, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.013680456206202507, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.287510871887207, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.022973693907260895, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.67602252960205, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.03291356563568115, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.486618995666504, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.019865863025188446, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.061497211456299, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.026225514709949493, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.19451096653938293, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.003476930083706975, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.146425247192383, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0013448139652609825, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.5703182220459, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.030642878264188766, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.013200759887695, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.026525191962718964, "pnorm/_forward_module.model.norm.weight": 29.662708282470703, "gnorm/_forward_module.model.norm.weight": 0.0033500581048429012, "pnorm/_forward_module.lm_head.weight": 222.24252319335938, "gnorm/_forward_module.lm_head.weight": 0.05115242302417755} +{"step": 1090519040, "pnorm/_forward_module.model.embeddings.weight": 140.15133666992188, "gnorm/_forward_module.model.embeddings.weight": 0.07922381907701492, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.841238021850586, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0031614152248948812, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.015802383422852, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.008203941397368908, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.755172729492188, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.010147161781787872, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.983978271484375, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.11816585063934326, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.958621978759766, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.11099910736083984, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.064547300338745, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.006918351165950298, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3139130175113678, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0010512734297662973, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.078943252563477, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0017766974633559585, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.034521102905273, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.06385936588048935, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.145845413208008, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.07658208906650543, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.306766510009766, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0019541517831385136, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.059471130371094, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.008407048881053925, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.892735481262207, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.011816577985882759, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.349262237548828, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.08215217292308807, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.433116912841797, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.06828607618808746, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.349006414413452, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.009299427270889282, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.15771782398223877, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0007032426074147224, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.35382652282715, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0014264181954786181, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.596126556396484, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.05161890387535095, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.51765251159668, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.04407934844493866, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.660720825195312, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.002187730511650443, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.743623733520508, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.012376836501061916, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.313630104064941, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.017311103641986847, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.738736152648926, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.07231032848358154, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.888677597045898, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.04724093899130821, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.966099262237549, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.01121920719742775, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.18980522453784943, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0009647986735217273, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.55464744567871, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0016695949016138911, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.23238754272461, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.04974454641342163, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.96778678894043, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.04319551959633827, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.082637786865234, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0020019873045384884, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 13.95877742767334, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.008429250679910183, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.394302368164062, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.012955213896930218, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.131775856018066, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.06877273321151733, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.416582107543945, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.04573037475347519, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.7740440368652344, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.008134991861879826, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.1565076857805252, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0007290109060704708, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.339275360107422, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.006424775812774897, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.970130920410156, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.08891242742538452, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.67437171936035, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.08755398541688919, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.007587432861328, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.011972494423389435, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.19236946105957, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.10034631937742233, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.41141128540039, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.15714915096759796, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.146242141723633, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.1556464433670044, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.580925941467285, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.03478928655385971, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6434123516082764, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.014257240109145641, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15001176297664642, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.002584763802587986, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.288318634033203, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0011585642350837588, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.249711990356445, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.03543228656053543, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.008056640625, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.029567334800958633, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.39065170288086, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0014812819426879287, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.901124954223633, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.007057493552565575, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.308096885681152, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.011616470292210579, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.69852352142334, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.04603620991110802, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.524603843688965, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.025378182530403137, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.069857597351074, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.004654102958738804, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.1951240599155426, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0005629057995975018, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.15358543395996, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0018903726013377309, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.597644805908203, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.03797883167862892, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.031841278076172, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.03261088207364082, "pnorm/_forward_module.model.norm.weight": 29.747772216796875, "gnorm/_forward_module.model.norm.weight": 0.00526442751288414, "pnorm/_forward_module.lm_head.weight": 222.83094787597656, "gnorm/_forward_module.lm_head.weight": 0.05147223919630051} +{"step": 1111490560, "pnorm/_forward_module.model.embeddings.weight": 140.2875213623047, "gnorm/_forward_module.model.embeddings.weight": 0.07013580948114395, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.837039947509766, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0025237819645553827, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.044673919677734, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.008316789753735065, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.780292510986328, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.009410723112523556, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.973408699035645, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.09586941450834274, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.948168754577637, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.09560512006282806, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0659072399139404, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.006793656852096319, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.31534314155578613, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.001306528109125793, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.071001052856445, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0017664078623056412, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.031444549560547, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.05343272164463997, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.143573760986328, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.06548408418893814, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.302040100097656, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0017919084057211876, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.071805000305176, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.009193847887217999, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.90208911895752, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.011363324709236622, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.341365814208984, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.06560688465833664, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.42529296875, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.06156325712800026, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3528308868408203, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.011203402653336525, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.15824754536151886, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0013600373640656471, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.35552978515625, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0012606850359588861, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.60682487487793, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.046089235693216324, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.525503158569336, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.04002489894628525, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.66216278076172, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0024269616696983576, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.759711265563965, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.01789938658475876, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.325387001037598, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.02803194336593151, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.735997200012207, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.06130155920982361, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.886418342590332, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.0449640154838562, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.973254919052124, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.009551696479320526, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19050060212612152, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0010942243970930576, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.556612014770508, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0015252988087013364, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.244375228881836, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.04529449716210365, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.978147506713867, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.04016077518463135, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.09061050415039, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0017103116260841489, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 13.994242668151855, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.007897221483290195, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.42388916015625, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.01092718355357647, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.13410758972168, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.05822041258215904, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.420636177062988, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.04463972896337509, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.7801096439361572, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.008207290433347225, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.15727290511131287, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0011127183679491282, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.33422088623047, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.008049838244915009, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.96333885192871, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.1026243343949318, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.671886444091797, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.08511684089899063, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.015247344970703, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.013341660611331463, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.215439796447754, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.1259041130542755, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.429594993591309, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.1878938525915146, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.156519889831543, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.11099589616060257, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.6065034866333, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.03015989065170288, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6455416679382324, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.015306876040995121, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15014703571796417, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.002756267786026001, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.2904109954834, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0011002789251506329, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.264053344726562, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.03286460414528847, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.01937484741211, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.028024185448884964, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.40725326538086, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.001493872026912868, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.926856994628906, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.01259611640125513, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.327155113220215, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.020150810480117798, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.720136642456055, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.04059341922402382, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.560823440551758, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.022278236225247383, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.0772178173065186, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.019622892141342163, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.19582346081733704, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0025265810545533895, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.16101837158203, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0019724464509636164, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.624771118164062, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.03588184714317322, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.04960060119629, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.03108914941549301, "pnorm/_forward_module.model.norm.weight": 29.828536987304688, "gnorm/_forward_module.model.norm.weight": 0.0037915001157671213, "pnorm/_forward_module.lm_head.weight": 223.3860626220703, "gnorm/_forward_module.lm_head.weight": 0.0574253611266613} +{"step": 1132462080, "pnorm/_forward_module.model.embeddings.weight": 140.41250610351562, "gnorm/_forward_module.model.embeddings.weight": 0.07005149126052856, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.832460403442383, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.002700255950912833, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.071003913879395, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.008405433036386967, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.803256034851074, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.011376790702342987, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.96272087097168, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.096554234623909, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.93791389465332, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.09428264945745468, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.067250967025757, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.006935805082321167, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3169975280761719, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.001108950818888843, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.062971115112305, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0014956947416067123, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.027990341186523, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.05210975930094719, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.14151954650879, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.058231186121702194, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.29786491394043, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0017995196394622326, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.085312843322754, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.007773655001074076, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.912480354309082, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.010244077071547508, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.333538055419922, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.06472166627645493, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.41759967803955, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.05759293586015701, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3558666706085205, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.008852451108396053, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.15881334245204926, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0007335864356718957, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.35672378540039, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.001103862188756466, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.616374969482422, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.042299337685108185, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.532445907592773, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.035978421568870544, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.66423225402832, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.001750954077579081, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.777522087097168, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.010396602563560009, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.338258743286133, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.01565021276473999, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.733648300170898, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.057859763503074646, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.884726524353027, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.04223340377211571, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.9772961139678955, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.011175453662872314, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19068792462348938, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0010026647942140698, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.557971954345703, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0014360827626660466, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.25456428527832, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.04287739843130112, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.987411499023438, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.03668570518493652, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.0987606048584, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0015515448758378625, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.028903007507324, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.006710642483085394, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.453722953796387, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.009555593132972717, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.1367826461792, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.053337499499320984, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.425265312194824, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.04066028445959091, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.7843172550201416, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.006592963822185993, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.1575927734375, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0009665488614700735, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.3295955657959, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.004914171062409878, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.956836700439453, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.0707348883152008, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.669090270996094, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.07149539887905121, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.02239990234375, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.009273354895412922, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.238860130310059, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.07497520744800568, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.448932647705078, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.11690657585859299, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.167336463928223, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.133189857006073, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.631538391113281, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.03181123360991478, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6487135887145996, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.011915693990886211, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15036359429359436, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0020220731385052204, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.292638778686523, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0009897155687212944, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.277585983276367, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.033604398369789124, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.03029441833496, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.028782380744814873, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.42422103881836, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0015594623982906342, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.952001571655273, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.01136582251638174, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.345212936401367, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.019411150366067886, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.741652488708496, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.04494774714112282, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.597099304199219, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.024063998833298683, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.085348129272461, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.009353731758892536, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.1965925097465515, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0012521554017439485, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.167526245117188, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0020771340932697058, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.64804458618164, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.041973527520895004, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.066171646118164, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.035456135869026184, "pnorm/_forward_module.model.norm.weight": 29.90931510925293, "gnorm/_forward_module.model.norm.weight": 0.003897160990163684, "pnorm/_forward_module.lm_head.weight": 223.91432189941406, "gnorm/_forward_module.lm_head.weight": 0.0787363052368164} +{"step": 1153433600, "pnorm/_forward_module.model.embeddings.weight": 140.52688598632812, "gnorm/_forward_module.model.embeddings.weight": 0.06542657315731049, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.8276309967041, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0023350240662693977, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.095189094543457, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.007055103313177824, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.824127197265625, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.008356685750186443, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.952077865600586, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.09232486039400101, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.927433013916016, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.0894157886505127, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.068364381790161, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.00553342467173934, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3184730112552643, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0005585517501458526, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.05598258972168, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0016690699849277735, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.025575637817383, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.04894594848155975, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.13996696472168, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.05593087896704674, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.29285430908203, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0015981782926246524, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.09516429901123, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.00738826859742403, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.920215606689453, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.00949106551706791, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.325562477111816, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.06250336021184921, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.409852981567383, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.054987456649541855, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.358421564102173, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.008650490082800388, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.15916350483894348, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0006118080927990377, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.35833168029785, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.001219879719428718, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.626028060913086, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.042713407427072525, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.539480209350586, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.034278854727745056, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.66520118713379, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.001681598019786179, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.791035652160645, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.010977651923894882, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.34778881072998, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.016045769676566124, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.73039722442627, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.05610594525933266, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.88193416595459, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.038847435265779495, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.983114242553711, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.009384261444211006, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.1912103295326233, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0009904255857691169, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.5594425201416, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0011901938123628497, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.264535903930664, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.03918493539094925, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 19.996305465698242, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.033191829919815063, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.106124877929688, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0013859064783900976, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.060755729675293, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.007188094779849052, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.481351852416992, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.010476584546267986, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.138773918151855, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.047670308500528336, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.429427146911621, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.03657669574022293, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.7901158332824707, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.0062392158433794975, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.15800878405570984, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0007443256326951087, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.32451057434082, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.004150868859142065, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.95003318786621, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.06020720675587654, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.666358947753906, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.06184646487236023, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.02960205078125, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.007746866438537836, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.260727882385254, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.06404006481170654, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.466926574707031, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.09898947924375534, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.177450180053711, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.11513212323188782, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.656038284301758, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.02886536903679371, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6502676010131836, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.00948623102158308, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.1504761129617691, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.001400056411512196, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.29306983947754, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0009231743752025068, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.287900924682617, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.030231960117816925, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.03913688659668, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.027671104297041893, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.44093894958496, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0015845431480556726, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.975945472717285, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.014731072820723057, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.3623628616333, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.023638099431991577, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.76258373260498, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.0376235730946064, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.632291793823242, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.021287458017468452, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.0913822650909424, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.031196292489767075, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.19708634912967682, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0039021812845021486, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.174711227416992, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.001524546998552978, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.672576904296875, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.0351446159183979, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.083084106445312, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.030639000236988068, "pnorm/_forward_module.model.norm.weight": 29.987319946289062, "gnorm/_forward_module.model.norm.weight": 0.004556950647383928, "pnorm/_forward_module.lm_head.weight": 224.4150848388672, "gnorm/_forward_module.lm_head.weight": 0.057013604789972305} +{"step": 1174405120, "pnorm/_forward_module.model.embeddings.weight": 140.63111877441406, "gnorm/_forward_module.model.embeddings.weight": 0.06350483745336533, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.823081970214844, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0023480236995965242, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.118627548217773, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.006803302094340324, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.844606399536133, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.007823570631444454, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.9418306350708, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.08600746840238571, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.917404174804688, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.0824626088142395, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0703773498535156, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.0054673501290380955, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.31995582580566406, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0002979365235660225, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.050016403198242, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0013084125239402056, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.02397918701172, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.0466214083135128, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.138792037963867, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.054467473179101944, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.288494110107422, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0014926716685295105, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.106168746948242, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.007602308876812458, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.928791999816895, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.009807465597987175, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.318020820617676, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.056647345423698425, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.402556419372559, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.05159002169966698, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3613884449005127, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.010095944628119469, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.15968014299869537, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0003868946514558047, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.359798431396484, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0010853500571101904, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.634817123413086, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.03873531147837639, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.5457706451416, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03279959037899971, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.666156768798828, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.001767151989042759, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.804082870483398, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.010467530228197575, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.357304573059082, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.017300723120570183, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.727706909179688, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.052001748234033585, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.87964153289795, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.0378662645816803, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.9844725131988525, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.007691247388720512, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19111543893814087, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0007603327976539731, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.56175994873047, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.001080596586689353, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.275146484375, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.037008874118328094, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.00554084777832, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.03217816725373268, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.112802505493164, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.001278562587685883, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.09032154083252, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.005765886977314949, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.506616592407227, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.008396543562412262, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.140140533447266, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.04523066058754921, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.43287467956543, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.036358170211315155, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.793832302093506, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.004778907168656588, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.15839727222919464, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0005801309016533196, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.31954002380371, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0017684909980744123, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.943429946899414, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.04153585061430931, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.663700103759766, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.058267101645469666, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.035993576049805, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.005280831828713417, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.280735969543457, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.030128300189971924, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.483148574829102, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.04254244267940521, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.186250686645508, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.12592770159244537, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.676959991455078, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.028317324817180634, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6525418758392334, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.007228670176118612, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15067847073078156, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0009971370454877615, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.295989990234375, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0009439450222998857, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.300559997558594, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.03108772076666355, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.049409866333008, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.026877041906118393, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.456787109375, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0013797342544421554, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 13.998513221740723, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.012155037373304367, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.378240585327148, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.01951027289032936, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.782970428466797, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.03889621049165726, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.666496276855469, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.021720662713050842, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.096707344055176, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.018112648278474808, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.19759953022003174, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.002550828969106078, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.18120765686035, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0013745242031291127, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.695024490356445, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.03351172059774399, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.098674774169922, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.028699547052383423, "pnorm/_forward_module.model.norm.weight": 30.06251335144043, "gnorm/_forward_module.model.norm.weight": 0.002978721633553505, "pnorm/_forward_module.lm_head.weight": 224.88365173339844, "gnorm/_forward_module.lm_head.weight": 0.05117204412817955} +{"step": 1195376640, "pnorm/_forward_module.model.embeddings.weight": 140.725830078125, "gnorm/_forward_module.model.embeddings.weight": 0.06494330614805222, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.819473266601562, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.002459406852722168, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.14330005645752, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.0069696721620857716, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.866477966308594, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.0077680968679487705, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.932450294494629, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.0911393016576767, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.908172607421875, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.0884045660495758, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0714809894561768, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.005528942681849003, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.32155388593673706, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0005000746459700167, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.043289184570312, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0013527829432860017, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.02115249633789, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.05034896358847618, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.13680648803711, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.05612659826874733, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.283655166625977, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.001525079132989049, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.115068435668945, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.00812515802681446, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.935296058654785, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.010698404163122177, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.310400009155273, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.061073243618011475, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.394925117492676, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.0538199208676815, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.365161895751953, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.009302590973675251, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16012662649154663, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0005163921159692109, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.360118865966797, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0011881417594850063, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.64143943786621, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.041416559368371964, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.55055046081543, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03439434990286827, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.66729164123535, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0017167292535305023, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.81755542755127, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.010764461010694504, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.367280006408691, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.015863537788391113, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.725038528442383, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.05638404190540314, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.877326011657715, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.039018187671899796, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.9865124225616455, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.006826121360063553, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19134050607681274, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.00038134498754516244, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.563236236572266, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.001210696529597044, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.283723831176758, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.038872286677360535, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.013227462768555, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.033985208719968796, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.120872497558594, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.001242728321813047, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.12183952331543, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.006306661292910576, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.53380298614502, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.009291821159422398, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.142870903015137, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.047643691301345825, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.437219619750977, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.03793969377875328, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.7968833446502686, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.006391364615410566, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.15865860879421234, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0008067268645390868, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.314659118652344, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0034443815238773823, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.936071395874023, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.05358745902776718, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.660324096679688, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.06263459473848343, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.042970657348633, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.006755499634891748, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.301055908203125, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.05289757251739502, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.499591827392578, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.07540543377399445, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.194709777832031, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.12856218218803406, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.697431564331055, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.02826029248535633, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.655552864074707, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.008208603598177433, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15095531940460205, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0013492002617567778, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.297107696533203, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0009376388043165207, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.309925079345703, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.030241655185818672, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.05777931213379, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.025929324328899384, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.47197151184082, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0012892205268144608, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.020493507385254, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.009652532637119293, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.393706321716309, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.015605742111802101, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.803006172180176, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.0360952690243721, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.699627876281738, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.01996314711868763, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.1017580032348633, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.013842535205185413, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.1981411874294281, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0017976779490709305, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.187320709228516, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0013340591685846448, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.71595001220703, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.03081437386572361, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.112850189208984, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.028515635058283806, "pnorm/_forward_module.model.norm.weight": 30.13469886779785, "gnorm/_forward_module.model.norm.weight": 0.00422003073617816, "pnorm/_forward_module.lm_head.weight": 225.32162475585938, "gnorm/_forward_module.lm_head.weight": 0.04920729994773865} +{"step": 1216348160, "pnorm/_forward_module.model.embeddings.weight": 140.8114471435547, "gnorm/_forward_module.model.embeddings.weight": 0.0626826286315918, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.815256118774414, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0023456683848053217, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.165800094604492, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.00826957169920206, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.886083602905273, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.009887191466987133, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.922578811645508, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.08878620713949203, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.89871597290039, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.08449744433164597, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0732061862945557, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.007411227561533451, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.32296451926231384, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0013828517403453588, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.03859519958496, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0012895704712718725, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.020580291748047, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.04802636802196503, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.136234283447266, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.058734022080898285, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.278989791870117, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0017537750536575913, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.123615264892578, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.007729469332844019, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.941476821899414, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.009528870694339275, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.302790641784668, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.06211109831929207, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.387563705444336, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.05310742184519768, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.367689609527588, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.008299448527395725, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16065849363803864, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.000878406222909689, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.362092971801758, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0011105082230642438, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.649892807006836, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.04063405841588974, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.556821823120117, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03552047163248062, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.668930053710938, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0016897948225960135, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.831611633300781, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.009376207366585732, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.377381324768066, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.013738220557570457, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.722373962402344, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.05798395350575447, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.875232696533203, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.037252478301525116, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.9894704818725586, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.007744812406599522, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19165392220020294, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0006725696730427444, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.56380844116211, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0015269556315615773, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.290443420410156, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.03905156999826431, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.019821166992188, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.03532673791050911, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.127216339111328, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.001415669801644981, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.149659156799316, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.005935473833233118, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.557770729064941, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.009271674789488316, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.143608093261719, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.050097595900297165, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.439607620239258, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.03682173416018486, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.8017823696136475, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.005373160354793072, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.15917734801769257, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0005838232464157045, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.310226440429688, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.005881119053810835, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.929418563842773, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.07551079988479614, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.657411575317383, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.07536865770816803, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.048992156982422, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.011006909422576427, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.320734977722168, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.0923554077744484, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.515235900878906, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.1441965103149414, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.203204154968262, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.11370545625686646, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.717062950134277, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.026641616597771645, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6572399139404297, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.011420720256865025, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.1510688066482544, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.002109326422214508, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.298248291015625, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0010590213350951672, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.31850242614746, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.02893798239529133, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.06535530090332, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.023651868104934692, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.48729705810547, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0011939649702981114, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.041974067687988, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.00638969661667943, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.409256935119629, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.010127800516784191, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.822553634643555, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.03357662260532379, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.732038497924805, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.01874340884387493, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.107903242111206, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.007491940166801214, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.1986711323261261, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0009266718989238143, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.193706512451172, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0012013771338388324, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.736635208129883, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.028044624254107475, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.12766456604004, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.025445064529776573, "pnorm/_forward_module.model.norm.weight": 30.205249786376953, "gnorm/_forward_module.model.norm.weight": 0.00436502555385232, "pnorm/_forward_module.lm_head.weight": 225.7384490966797, "gnorm/_forward_module.lm_head.weight": 0.04454909265041351} +{"step": 1237319680, "pnorm/_forward_module.model.embeddings.weight": 140.88856506347656, "gnorm/_forward_module.model.embeddings.weight": 0.0634930357336998, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.811906814575195, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.002276252256706357, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.187747955322266, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.008500587195158005, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.905606269836426, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.009774391539394855, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.913702011108398, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.08863546699285507, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.889825820922852, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.08559013903141022, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.073880910873413, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.006835530046373606, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.32437756657600403, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0018297643400728703, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.032623291015625, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0014122406719252467, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.01785659790039, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.048472434282302856, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.134231567382812, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.05845370143651962, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.27420997619629, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0018561139004305005, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.131107330322266, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.007951578125357628, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.947081565856934, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.01031598262488842, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.295242309570312, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.06294641643762589, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.379984855651855, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.05420217290520668, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3691065311431885, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.010948838666081429, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1609305888414383, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0010666393209248781, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.36431312561035, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0010320116998627782, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.658199310302734, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.040178991854190826, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.562763214111328, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.036282509565353394, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.670513153076172, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0018190190894529223, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.845538139343262, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.01257126871496439, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.387794494628906, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.019655248150229454, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.719339370727539, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.05515744164586067, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.872395515441895, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.039537880569696426, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.992725133895874, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.007054134272038937, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19196820259094238, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.00070453982334584, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.56476593017578, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0014397967606782913, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.297433853149414, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.04111970588564873, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.026233673095703, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.03698442503809929, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.133636474609375, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0014907352160662413, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.17672061920166, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.005961145740002394, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.581433296203613, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.009039917029440403, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.144224166870117, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.054937947541475296, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.441853523254395, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.040717776864767075, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.8075358867645264, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.005290582776069641, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.1596459001302719, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0006201759679242969, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.30588150024414, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.006508999038487673, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.922176361083984, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.08556639403104782, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.6541748046875, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.08161702752113342, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.054922103881836, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.011806984432041645, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.339011192321777, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.10433805733919144, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.530226707458496, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.15424844622612, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.2113037109375, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.11772645264863968, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.736613273620605, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.028383223339915276, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.658351182937622, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.01260099932551384, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15111024677753448, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.002273100893944502, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.298852920532227, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0009777629747986794, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.326156616210938, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.03105182573199272, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.07256507873535, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.025182297453284264, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.502708435058594, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0011959399562329054, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.063056945800781, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.00811985693871975, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.42474365234375, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.012593698687851429, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.841636657714844, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.035285912454128265, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.763352394104004, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.019012536853551865, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.1150004863739014, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.011600450612604618, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.19927240908145905, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0011927419109269977, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.19883155822754, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0010952370939776301, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.75434112548828, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.02943258173763752, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.140674591064453, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.0256598312407732, "pnorm/_forward_module.model.norm.weight": 30.274080276489258, "gnorm/_forward_module.model.norm.weight": 0.004100180696696043, "pnorm/_forward_module.lm_head.weight": 226.13233947753906, "gnorm/_forward_module.lm_head.weight": 0.04004082828760147} +{"step": 1258291200, "pnorm/_forward_module.model.embeddings.weight": 140.9578094482422, "gnorm/_forward_module.model.embeddings.weight": 0.06063740700483322, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.808208465576172, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0021605438087135553, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.208089828491211, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.007497465703636408, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.923527717590332, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.008646723814308643, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.904733657836914, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.0874607115983963, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.880937576293945, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.08318505436182022, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.075662612915039, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.006078120321035385, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.32572269439697266, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0005800988292321563, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.027965545654297, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.001282096141949296, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.016193389892578, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.04720378667116165, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.132919311523438, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.055415183305740356, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.270435333251953, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0015385064762085676, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.14002513885498, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.006994299124926329, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.953818321228027, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.009982168674468994, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.288522720336914, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.05948098376393318, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.373271942138672, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.052878957241773605, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3705997467041016, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.007546186447143555, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16129229962825775, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00040453753899782896, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.366878509521484, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.001112981466576457, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.666690826416016, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.03968862444162369, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.568706512451172, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03357897698879242, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.6713924407959, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0014042711118236184, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.856966018676758, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.008254819549620152, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.395910263061523, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.012137032113969326, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.71658706665039, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.05386349931359291, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.869951248168945, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.037743423134088516, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.9933462142944336, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.00630616070702672, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19193291664123535, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.00047271366929635406, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.566991806030273, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0011039265664294362, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.305862426757812, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.03850811347365379, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.03369903564453, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.03371669724583626, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.139089584350586, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0013204512652009726, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.20042610168457, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.005356659647077322, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.601859092712402, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.008685889653861523, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.14430046081543, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.049681346863508224, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.443320274353027, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.03789716958999634, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.812361478805542, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.004759907256811857, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.15999598801136017, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0004021568747702986, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.30185317993164, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0038950382731854916, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.915987014770508, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.05837811157107353, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.651830673217773, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.06660016626119614, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.06048011779785, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.007481568027287722, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.357412338256836, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.061752352863550186, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.545580863952637, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.08818324655294418, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.218530654907227, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.12499909847974777, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.75312328338623, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.029026515781879425, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.658860683441162, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.009001716040074825, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.1511540412902832, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0013007082743570209, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.300247192382812, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0009669375140219927, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.33449363708496, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.03064950555562973, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.080001831054688, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.029021743685007095, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.517377853393555, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0015840606065467, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.08360767364502, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.016498740762472153, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.439476013183594, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.027810007333755493, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.859371185302734, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.03765421733260155, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.792799949645996, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.020643655210733414, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.1188695430755615, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.028147898614406586, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.19968239963054657, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.003798685036599636, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.20530128479004, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0012400287669152021, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.77415657043457, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.029973367229104042, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.15461540222168, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.025917628780007362, "pnorm/_forward_module.model.norm.weight": 30.339509963989258, "gnorm/_forward_module.model.norm.weight": 0.0035744858905673027, "pnorm/_forward_module.lm_head.weight": 226.49708557128906, "gnorm/_forward_module.lm_head.weight": 0.03992973640561104} +{"step": 1279262720, "pnorm/_forward_module.model.embeddings.weight": 141.0197296142578, "gnorm/_forward_module.model.embeddings.weight": 0.059265125542879105, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.804580688476562, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.00214063236489892, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.228760719299316, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.007351779378950596, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.941893577575684, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.008539681322872639, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.895759582519531, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.08555971086025238, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.872016906738281, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.08264566957950592, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.076246976852417, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.005955498665571213, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.326959490776062, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0009867704939097166, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.023134231567383, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0012404591543599963, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.01430320739746, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.04572124034166336, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.13139533996582, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.05598977953195572, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.266578674316406, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0016678695101290941, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.148319244384766, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.007479813881218433, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.960192680358887, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.009319275617599487, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.281804084777832, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.060610514134168625, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.366503715515137, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.05343422293663025, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3710978031158447, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.008580000139772892, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16148562729358673, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0010502575896680355, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.368160247802734, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0011445165146142244, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.67296028137207, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.040754079818725586, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.573169708251953, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.034627217799425125, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.673179626464844, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0016448916867375374, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.870048522949219, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.011335162445902824, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.405338287353516, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.01534795481711626, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.71405029296875, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.0577065572142601, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.867740631103516, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.038673605769872665, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.9945075511932373, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.008968210779130459, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19188560545444489, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0008577621192671359, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.56913948059082, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0014671996468678117, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.313526153564453, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.039050377905368805, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.04085922241211, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.034720342606306076, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.145397186279297, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0013484241208061576, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.225082397460938, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.005920679308474064, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.623130798339844, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.008630525320768356, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.145188331604004, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.05139915272593498, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.44591236114502, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.03930145129561424, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.8156096935272217, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.005254392512142658, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16032586991786957, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0006090838578529656, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.29776382446289, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.006601288449019194, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.909460067749023, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.08456316590309143, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.648834228515625, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.07769382745027542, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.066225051879883, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.011373800225555897, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.37394905090332, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.09747425466775894, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.559517860412598, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.15151214599609375, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.225625991821289, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.10417687147855759, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.77004623413086, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.026985928416252136, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.659838914871216, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.014291839674115181, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.1511983424425125, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.002675868570804596, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.301050186157227, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.000995783251710236, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.3411865234375, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.030422614887356758, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.08625030517578, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.025149697437882423, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.531007766723633, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0011410866864025593, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.101544380187988, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.0083090178668499, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.4524507522583, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.013512028381228447, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.877148628234863, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.03380439057946205, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.822219848632812, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.018686331808567047, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.1214780807495117, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.012932728976011276, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.19998577237129211, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.001653036568313837, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.210494995117188, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0013835449935868382, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.790372848510742, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.030403560027480125, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.16672706604004, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.027733977884054184, "pnorm/_forward_module.model.norm.weight": 30.403493881225586, "gnorm/_forward_module.model.norm.weight": 0.0023765568621456623, "pnorm/_forward_module.lm_head.weight": 226.83792114257812, "gnorm/_forward_module.lm_head.weight": 0.04687945917248726} +{"step": 1300234240, "pnorm/_forward_module.model.embeddings.weight": 141.07455444335938, "gnorm/_forward_module.model.embeddings.weight": 0.05894826352596283, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.800634384155273, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.00198227446526289, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.247485160827637, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.006856567692011595, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.95867919921875, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.007879210636019707, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.88671875, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.08114643394947052, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.863147735595703, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.07795476168394089, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.07735276222229, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.005295613780617714, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3281277120113373, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0008252895786426961, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.019065856933594, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.001253687427379191, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.01288604736328, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.04418200999498367, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.13018035888672, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.054651472717523575, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.26341438293457, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0016345864860340953, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.155627250671387, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.007111826911568642, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.965503692626953, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.009722121059894562, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.275693893432617, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.056286267936229706, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.360358238220215, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.04981424659490585, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3736448287963867, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.007802166976034641, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16184251010417938, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00043400374124757946, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.370744705200195, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0009415352833457291, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.680212020874023, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.037703968584537506, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.578351974487305, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03326768800616264, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.673994064331055, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0017113416688516736, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.880741119384766, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.012100561521947384, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.41299819946289, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.018058916553854942, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.710845947265625, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.05177498981356621, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.864806175231934, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.03587711229920387, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.997166872024536, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.006480247713625431, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.1921638697385788, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.000574760022573173, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.570526123046875, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0011055425275117159, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.319629669189453, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.037419483065605164, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.046588897705078, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.03221701830625534, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.1511287689209, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0012934647966176271, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.248129844665527, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.005547110922634602, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.64323902130127, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.00805575866252184, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.145723342895508, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.04491201043128967, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.447602272033691, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.03552280366420746, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.81978702545166, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.004381851758807898, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.1606668084859848, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0005172535311430693, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.293926239013672, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.004104199819266796, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.904159545898438, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.05899817496538162, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.64664077758789, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.06303907185792923, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.071884155273438, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.008204654790461063, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.391129493713379, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.06568442285060883, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.573802947998047, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.09967351704835892, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.232171058654785, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.1162981167435646, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.785712242126465, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.02688676118850708, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.661700963973999, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.007540133316069841, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15133067965507507, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0011435893829911947, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.30048370361328, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0008265849319286644, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.345314025878906, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.028929945081472397, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.091135025024414, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.02398681826889515, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.544897079467773, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0011622385354712605, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.118243217468262, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.006702927406877279, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.464751243591309, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.010317567735910416, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.8942289352417, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.032687485218048096, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.85085391998291, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.018694717437028885, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.1247189044952393, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.005984886083751917, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.20030008256435394, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0006864492315798998, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.216081619262695, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0012233592569828033, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.806594848632812, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.028138775378465652, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.178909301757812, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.0258586835116148, "pnorm/_forward_module.model.norm.weight": 30.466035842895508, "gnorm/_forward_module.model.norm.weight": 0.003679960733279586, "pnorm/_forward_module.lm_head.weight": 227.16094970703125, "gnorm/_forward_module.lm_head.weight": 0.040333256125450134} +{"step": 1321205760, "pnorm/_forward_module.model.embeddings.weight": 141.12271118164062, "gnorm/_forward_module.model.embeddings.weight": 0.05914343148469925, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.797168731689453, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0023244298063218594, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.265472412109375, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.006826381664723158, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.974549293518066, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.007800353690981865, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.878178596496582, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.08566804230213165, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.85483169555664, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.07893665879964828, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.078002691268921, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.005218514706939459, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3294304609298706, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0006667140987701714, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.014869689941406, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0013641832629218698, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.0108642578125, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.04499753937125206, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.128765106201172, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.053515877574682236, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.260221481323242, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0016426958609372377, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.16357421875, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.0065560415387153625, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.971574783325195, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.009375795722007751, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.269356727600098, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.05830622836947441, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.354089736938477, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.04973026365041733, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.375105381011963, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.007541948929429054, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16208921372890472, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0008057717350311577, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.371726989746094, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.001013649394735694, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.685026168823242, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.03810235112905502, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.58220100402832, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03246206417679787, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.67453384399414, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.001769588328897953, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.889781951904297, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.013959341682493687, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.41940689086914, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.022305944934487343, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.70811653137207, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.05109071359038353, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.862336158752441, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.035724300891160965, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 2.998246908187866, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.0077556646429002285, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19210615754127502, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0009923495817929506, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.572105407714844, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0010665490990504622, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.325820922851562, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.03702347353100777, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.05237579345703, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.03208387643098831, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.15675926208496, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.001289904466830194, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.269139289855957, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.005564711056649685, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.661782264709473, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.008329257369041443, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.146617889404297, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.0448724627494812, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.44967269897461, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.03578780218958855, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.823983907699585, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.005338352173566818, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16095513105392456, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0005585936596617103, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.290515899658203, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.00459871394559741, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.899364471435547, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.0640660971403122, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.644987106323242, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.06613844633102417, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.07720947265625, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.008581481873989105, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.405803680419922, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.07227980345487595, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.586116790771484, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.10675112158060074, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.238470077514648, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.11024279147386551, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.800271034240723, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.025619395077228546, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.663219928741455, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.007889938540756702, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.1514129638671875, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0013428392121568322, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.300697326660156, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0008640324813313782, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.350128173828125, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.02822836861014366, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.09630012512207, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.023723114281892776, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.5585994720459, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0010130447335541248, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.13365364074707, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.007073594257235527, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.476156234741211, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.011169329285621643, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.911296844482422, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.032297227531671524, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.87887191772461, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.018121885135769844, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.129603862762451, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.009648087434470654, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.20070742070674896, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0009357236558571458, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.220951080322266, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0012635764433071017, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.821165084838867, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.027674376964569092, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.190048217773438, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.02538728527724743, "pnorm/_forward_module.model.norm.weight": 30.525667190551758, "gnorm/_forward_module.model.norm.weight": 0.0026498560328036547, "pnorm/_forward_module.lm_head.weight": 227.46197509765625, "gnorm/_forward_module.lm_head.weight": 0.039499469101428986} +{"step": 1342177280, "pnorm/_forward_module.model.embeddings.weight": 141.164794921875, "gnorm/_forward_module.model.embeddings.weight": 0.058640748262405396, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.79366111755371, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.00194554531481117, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.283363342285156, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.007533328607678413, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 14.990673065185547, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.009363166987895966, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.869575500488281, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.08201615512371063, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.846343994140625, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.07856862246990204, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0785317420959473, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.006088461261242628, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3304711878299713, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0010135268094018102, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.011581420898438, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0013272017240524292, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.009660720825195, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.044763240963220596, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.12751007080078, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.0507451593875885, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.257417678833008, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0016690606717020273, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.171113014221191, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.007172630168497562, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.977062225341797, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.009726963937282562, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.263723373413086, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.06069335713982582, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.348464965820312, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.04956686869263649, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3761329650878906, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.009081731550395489, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16226404905319214, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0007989450241439044, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.373754501342773, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0009934010449796915, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.69076156616211, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.03863942250609398, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.586423873901367, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.03142193332314491, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.675485610961914, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0014227313222363591, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.89907169342041, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.009142551571130753, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.426260948181152, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.013192582875490189, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.705615043640137, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.05139648914337158, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.859855651855469, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.03499780595302582, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.000357151031494, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.007301134057343006, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19234442710876465, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0009527565562166274, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.573421478271484, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0010775267146527767, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.33114242553711, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.03591737523674965, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.057395935058594, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.03067995235323906, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.162181854248047, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0012994155986234546, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.290377616882324, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.005706945434212685, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.680243492126465, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.009243072010576725, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.146925926208496, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.04662926122546196, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.451044082641602, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.03414077311754227, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.826756715774536, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.0053755370900034904, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.161146879196167, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.000679567048791796, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.287187576293945, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.003955528140068054, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.893970489501953, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.05527150630950928, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.642980575561523, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.05838468670845032, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.082050323486328, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.007239924743771553, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.420088768005371, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.05999205633997917, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.597932815551758, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.09255199879407883, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.244279861450195, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.10181547701358795, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.813851356506348, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.02616438828408718, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.664363145828247, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.008550380356609821, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.1515038162469864, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0014515647199004889, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.299850463867188, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0009085623314604163, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.353164672851562, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.028072480112314224, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.100099563598633, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.024137774482369423, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.57217788696289, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0011201450834050775, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.148866653442383, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.006679420359432697, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.487471580505371, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.011233535595238209, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.928018569946289, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.033705465495586395, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.905919075012207, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.018850551918148994, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.1334283351898193, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.007182389497756958, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.20107322931289673, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0008393264724873006, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.225757598876953, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0014190205838531256, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.835142135620117, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.02948789857327938, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.200576782226562, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.02816929668188095, "pnorm/_forward_module.model.norm.weight": 30.582427978515625, "gnorm/_forward_module.model.norm.weight": 0.003229900961741805, "pnorm/_forward_module.lm_head.weight": 227.7449951171875, "gnorm/_forward_module.lm_head.weight": 0.04910266399383545} +{"step": 1363148800, "pnorm/_forward_module.model.embeddings.weight": 141.20095825195312, "gnorm/_forward_module.model.embeddings.weight": 0.05374623090028763, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.790552139282227, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.001824843231588602, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.299911499023438, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.007191051729023457, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.005489349365234, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.008387953974306583, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.861577987670898, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.0700729712843895, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.838410377502441, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.06799611449241638, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0796329975128174, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.005609361920505762, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.33148473501205444, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0004245341697242111, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.008216857910156, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0009347792947664857, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.00802993774414, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.038205213844776154, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.12608528137207, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.04079481214284897, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.25428009033203, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0011307531967759132, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.177942276000977, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.006765102501958609, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.982071876525879, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.008715745992958546, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.257672309875488, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.045187290757894516, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.342463493347168, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.04124993085861206, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.377960205078125, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.007256019860506058, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16249772906303406, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00028430490056052804, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.37534523010254, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0008669274393469095, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.695573806762695, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.03294141963124275, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.58985137939453, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.026452306658029556, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.67592430114746, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.000976547715254128, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.906492233276367, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.006685012020170689, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.431175231933594, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.009249130263924599, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.702682495117188, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.03978969529271126, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.857046127319336, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.03087293915450573, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.0009212493896484, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.004819781985133886, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19240565598011017, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.00034661972313188016, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.57468605041504, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0008481626282446086, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.33583641052246, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.03113553673028946, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.061687469482422, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.02588525414466858, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.167390823364258, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0009951089741662145, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.308845520019531, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.00497035589069128, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.696150779724121, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.007168017793446779, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.147887229919434, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.035867076367139816, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.452775001525879, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.02932261861860752, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.8297975063323975, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.003714391030371189, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.1614442765712738, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0002801416558213532, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.283971786499023, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0014904882991686463, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.888288497924805, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.03503669053316116, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.640836715698242, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.04212933033704758, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.086978912353516, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0037696531508117914, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.433381080627441, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.02505960687994957, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.609001159667969, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.03767385333776474, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.2499418258667, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.08521991968154907, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.826889038085938, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.023117993026971817, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.665050983428955, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.005273185204714537, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15155072510242462, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0005444155540317297, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.30011749267578, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.000710589752998203, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.35724639892578, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.025506025180220604, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.104522705078125, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.02224293164908886, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.585180282592773, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0009651709697209299, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.163945198059082, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.006553421262651682, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.498613357543945, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.011453812010586262, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.943700790405273, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.028452729806303978, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.931145668029785, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.01771644316613674, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.137404680252075, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.006563759874552488, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.20141372084617615, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0007686461322009563, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.229713439941406, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0013562324456870556, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.847579956054688, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.028703317046165466, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.209529876708984, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.027434498071670532, "pnorm/_forward_module.model.norm.weight": 30.637121200561523, "gnorm/_forward_module.model.norm.weight": 0.003315381007269025, "pnorm/_forward_module.lm_head.weight": 228.0102081298828, "gnorm/_forward_module.lm_head.weight": 0.04485529288649559} +{"step": 1384120320, "pnorm/_forward_module.model.embeddings.weight": 141.232177734375, "gnorm/_forward_module.model.embeddings.weight": 0.058650482445955276, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.787113189697266, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.002018216298893094, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.315279960632324, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.007539136800915003, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.019359588623047, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.008825286291539669, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.853346824645996, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.08020761609077454, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.83032512664795, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.07771926373243332, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.080162286758423, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.006202638614922762, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.332518070936203, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0009384734439663589, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.00519561767578, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0014836620539426804, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.006547927856445, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.044472403824329376, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.12481117248535, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.05741870403289795, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.251686096191406, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0015970258973538876, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.185342788696289, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.006840632762759924, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.987771987915039, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.008424407802522182, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.252098083496094, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.05816105753183365, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.336997032165527, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.05021923780441284, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.378664255142212, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.008146763779222965, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1626349687576294, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0007612494518980384, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.376850128173828, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0009943468030542135, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.6998348236084, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.03818826377391815, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.592926025390625, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.035417478531599045, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.67708396911621, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0019145694095641375, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.916311264038086, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.015116201713681221, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.437966346740723, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.022272586822509766, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.699816703796387, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.05453027784824371, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.854414939880371, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.03627659007906914, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.0006039142608643, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.0061686295084655285, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19235721230506897, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0007327236817218363, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.57683563232422, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0013861807528883219, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.34128189086914, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.038125406950712204, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.066442489624023, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.03503452613949776, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.17184066772461, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.001256531453691423, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.326518058776855, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.006047028116881847, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.711785316467285, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.008260744623839855, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.14812183380127, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.049443647265434265, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.453910827636719, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.03805055841803551, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.8316752910614014, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.005349011160433292, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16145026683807373, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0006320069078356028, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.280517578125, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.007043389603495598, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.881977081298828, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.08885577321052551, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.638086318969727, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.08054155111312866, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.09215545654297, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.011790873482823372, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.44782543182373, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.10693379491567612, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.620986938476562, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.16168992221355438, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.255590438842773, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.10495386272668839, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.839397430419922, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.025939002633094788, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6661930084228516, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.011451495811343193, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.151579350233078, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.002061538863927126, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.3001766204834, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0010234909132122993, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.360416412353516, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.029115770012140274, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.10812759399414, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.024506045505404472, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.59740447998047, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.001214071293361485, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.176595687866211, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.00908095482736826, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.50783920288086, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.015941698104143143, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.958739280700684, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.03184812515974045, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.955282211303711, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.017262596637010574, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.140420436859131, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.015003564767539501, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.20164674520492554, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.001756508951075375, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.23409080505371, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.001128224190324545, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.85969352722168, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.026378802955150604, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.218425750732422, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.02535208687186241, "pnorm/_forward_module.model.norm.weight": 30.689237594604492, "gnorm/_forward_module.model.norm.weight": 0.002233593724668026, "pnorm/_forward_module.lm_head.weight": 228.25726318359375, "gnorm/_forward_module.lm_head.weight": 0.03413098677992821} +{"step": 1405091840, "pnorm/_forward_module.model.embeddings.weight": 141.25833129882812, "gnorm/_forward_module.model.embeddings.weight": 0.05415948107838631, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.78402328491211, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0018000563140958548, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.329947471618652, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.006924462504684925, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.032781600952148, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.007897059433162212, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.84557056427002, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.07260427623987198, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.822630882263184, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.07157719880342484, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0806212425231934, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.004937555640935898, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3333693742752075, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.00046684255357831717, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.002613067626953, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0011545694433152676, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.00518035888672, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.03855122625827789, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.123626708984375, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.04388918727636337, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.248605728149414, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0012675904436036944, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.190898895263672, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.006737087853252888, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.991753578186035, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.0087998416274786, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.246384620666504, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.047184813767671585, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.331475257873535, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.044190097600221634, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.379140615463257, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.007752858567982912, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16276521980762482, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0008044333080761135, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.378419876098633, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0009112003026530147, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.70374870300293, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.0351591520011425, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.595726013183594, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.028599543496966362, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.67791175842285, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0014896116917952895, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.924257278442383, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.010679430328309536, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.443900108337402, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.017026178538799286, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.697083473205566, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.04351349547505379, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.851632118225098, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.03164813295006752, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.001248836517334, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.008405469357967377, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19224528968334198, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0007650203187949955, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.5781307220459, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0010329480282962322, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.34536361694336, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.0332251638174057, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.070262908935547, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.027882663533091545, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.176557540893555, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.001140198903158307, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.34247875213623, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.0056751202791929245, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.725225448608398, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.008057800121605396, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.148874282836914, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.03741441294550896, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.455514907836914, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.031217413023114204, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.834176778793335, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.004951159469783306, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16167257726192474, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0005358237540349364, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.277551651000977, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0034964419901371002, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.876548767089844, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.05123686045408249, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.63565444946289, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.05274403840303421, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.096538543701172, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.00644539762288332, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.460235595703125, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.05279520899057388, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.631239891052246, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.07985293865203857, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.260187149047852, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.09103164076805115, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.850116729736328, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.023007025942206383, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.667814254760742, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.008620255626738071, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15166711807250977, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0014039167435839772, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.300125122070312, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0007771641830913723, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.363014221191406, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.02567717432975769, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.111433029174805, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.02178085222840309, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.60945701599121, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.001035600434988737, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.190305709838867, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.006778387818485498, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.518033981323242, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.011051727458834648, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.973413467407227, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.028227834030985832, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 12.978568077087402, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.016688702628016472, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.142080307006836, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.007087558973580599, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.2017400711774826, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.00045114484964869916, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.23851203918457, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0012396343518048525, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.871734619140625, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.02682872675359249, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.226953506469727, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.02556586265563965, "pnorm/_forward_module.model.norm.weight": 30.73866081237793, "gnorm/_forward_module.model.norm.weight": 0.0034814453683793545, "pnorm/_forward_module.lm_head.weight": 228.48666381835938, "gnorm/_forward_module.lm_head.weight": 0.0415467843413353} +{"step": 1426063360, "pnorm/_forward_module.model.embeddings.weight": 141.28012084960938, "gnorm/_forward_module.model.embeddings.weight": 0.06143143028020859, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.780841827392578, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.00229337764903903, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.343125343322754, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.006920484360307455, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.044458389282227, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.00822452176362276, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.838013648986816, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.08910354226827621, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.815057754516602, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.08457683026790619, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0814008712768555, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.005676338914781809, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3342207372188568, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0007715040119364858, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 22.00058364868164, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0014902764232829213, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.00417709350586, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.0479637086391449, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.122636795043945, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.05628800764679909, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.24551773071289, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0018297253409400582, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.195685386657715, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.00785733386874199, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.994994163513184, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.010560862720012665, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.240538597106934, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.06468556076288223, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.32577896118164, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.05379108339548111, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.380969762802124, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.009467413648962975, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16301283240318298, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0009153638384304941, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.38018226623535, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.001118742162361741, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.707740783691406, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.042473506182432175, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.59857177734375, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.035390038043260574, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.67915916442871, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.001876703230664134, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.932242393493652, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.014147643931210041, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.449552536010742, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.020041435956954956, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.694643020629883, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.05835969001054764, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.849209785461426, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.036664001643657684, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.0027120113372803, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.008318657986819744, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19226545095443726, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0008854373008944094, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.57876968383789, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.001222226652316749, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.348007202148438, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.03879677876830101, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.073116302490234, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.03417186439037323, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.180660247802734, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0014247809303924441, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.357261657714844, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.006059303879737854, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.738140106201172, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.009050014428794384, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.14871597290039, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.049621615558862686, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.456313133239746, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.03666876256465912, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.836935520172119, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.0067070359364151955, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16199980676174164, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0008755015442147851, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.275026321411133, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.005864987149834633, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.871463775634766, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.07914095371961594, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.633554458618164, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.07375045865774155, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.10091781616211, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.01053055003285408, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.473670959472656, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.09218183159828186, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.642518043518066, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.14585240185260773, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.26554012298584, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.0896771103143692, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.861485481262207, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.02703119069337845, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.668905258178711, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.010807356797158718, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15167659521102905, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0018861411372199655, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.299448013305664, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0009873913368210196, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.364133834838867, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.02944166585803032, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.114059448242188, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.02424027770757675, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.621122360229492, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.001310874824412167, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.202688217163086, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.008222578093409538, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.5269775390625, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.01423039473593235, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 11.9876070022583, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.03516367822885513, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.000852584838867, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.018927790224552155, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.145982265472412, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.006846986711025238, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.2021321952342987, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0007431220728904009, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.242679595947266, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0014447948196902871, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.882671356201172, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.030102215707302094, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.234933853149414, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.027023527771234512, "pnorm/_forward_module.model.norm.weight": 30.786340713500977, "gnorm/_forward_module.model.norm.weight": 0.0032523951958864927, "pnorm/_forward_module.lm_head.weight": 228.70083618164062, "gnorm/_forward_module.lm_head.weight": 0.04591897502541542} +{"step": 1447034880, "pnorm/_forward_module.model.embeddings.weight": 141.29800415039062, "gnorm/_forward_module.model.embeddings.weight": 0.05258964002132416, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.777780532836914, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.001824040780775249, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.356145858764648, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.006446984130889177, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.056172370910645, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.007319051772356033, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.830506324768066, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.07009615004062653, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.807587623596191, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.06967712938785553, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0821123123168945, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.0047168671153485775, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3351103663444519, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.00026414936291985214, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.998125076293945, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.001134722027927637, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.002609252929688, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.03770115599036217, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.12126350402832, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.03995811939239502, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.24334716796875, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0011333344737067819, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.20146656036377, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.006455204915255308, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 11.999241828918457, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.00849646795541048, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.235515594482422, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.046448204666376114, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.320708274841309, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.043121203780174255, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.382667064666748, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.00776921771466732, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1631447970867157, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0007328201318159699, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.38278579711914, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0008522021817043424, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.712427139282227, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.03434569388628006, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.601621627807617, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.026990268379449844, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.68056297302246, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0011590613285079598, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.939781188964844, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.008263804949820042, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.45512866973877, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.011462535709142685, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.692529678344727, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.04051613062620163, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.847216606140137, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.03148635849356651, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.0049619674682617, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.008175786584615707, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19244834780693054, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0007070201099850237, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.57982063293457, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0009730973397381604, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.350919723510742, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.033133480697870255, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.076122283935547, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.02674063853919506, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.184579849243164, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0010417834855616093, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.371920585632324, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.005275707691907883, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.751214981079102, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.007814446464180946, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.148221015930176, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.03519723564386368, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.456670761108398, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.029805844649672508, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.8399524688720703, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.005025614984333515, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16223132610321045, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0005328648840077221, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.27263832092285, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0015895599499344826, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.866844177246094, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.03653561696410179, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.63156509399414, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.04529011249542236, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.105085372924805, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.003769832430407405, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.485916137695312, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.025978367775678635, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.652729988098145, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.03325425460934639, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.270330429077148, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.08899592608213425, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.872037887573242, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.024067522957921028, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.669525623321533, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.005143071990460157, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15172874927520752, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0005798544734716415, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.29937744140625, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.000771659251768142, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.366060256958008, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.026611287146806717, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.116830825805664, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.022945253178477287, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.63125991821289, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0010296710534021258, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.213436126708984, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.009367029182612896, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.534589767456055, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.01571926847100258, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.00025463104248, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.029697958379983902, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.021187782287598, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.01723971962928772, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.1484856605529785, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.013487432152032852, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.20230819284915924, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0015228039119392633, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.246625900268555, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.001230333000421524, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.892702102661133, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.028739765286445618, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.242645263671875, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.02588404156267643, "pnorm/_forward_module.model.norm.weight": 30.831363677978516, "gnorm/_forward_module.model.norm.weight": 0.003400243818759918, "pnorm/_forward_module.lm_head.weight": 228.90109252929688, "gnorm/_forward_module.lm_head.weight": 0.047863125801086426} +{"step": 1468006400, "pnorm/_forward_module.model.embeddings.weight": 141.31236267089844, "gnorm/_forward_module.model.embeddings.weight": 0.05013102665543556, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.775081634521484, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0015525136841461062, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.368962287902832, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.006622446700930595, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.067627906799316, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.007500693667680025, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.82346248626709, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.0645046979188919, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.800561904907227, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.06562145799398422, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0828769207000732, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.005047605838626623, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3362793028354645, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.000645408290438354, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.995513916015625, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0010446823434904218, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 27.000473022460938, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.03636545315384865, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.119388580322266, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.041658833622932434, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.241077423095703, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0011502369306981564, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.206478118896484, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.0057373084127902985, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.003098487854004, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.007095671724528074, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.230626106262207, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.04263436049222946, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.315875053405762, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.04125181958079338, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.384270668029785, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.007089054677635431, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16333702206611633, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00047861470375210047, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.384702682495117, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0007941853837110102, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.715898513793945, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.03216007724404335, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.604129791259766, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.027870191261172295, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.682024002075195, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0014050680911168456, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.94752025604248, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.011101880110800266, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.460402488708496, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.016452597454190254, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.690164566040039, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.039375998079776764, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.845064163208008, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.03062829002737999, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.0067365169525146, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.005773080978542566, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19243018329143524, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.000580997730139643, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.58149528503418, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0010479808552190661, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.354467391967773, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.03235367685556412, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.07958984375, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.027739843353629112, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.18927764892578, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0009558442980051041, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.38760757446289, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.004932490177452564, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.764946937561035, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.006948210299015045, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.148770332336426, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.03728168085217476, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.457958221435547, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.03126651421189308, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.841994524002075, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.005382952746003866, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16223062574863434, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0006781523115932941, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.269845962524414, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.004907122813165188, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.861183166503906, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.06488759815692902, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.62860107421875, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.05747140571475029, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.109296798706055, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.008249299600720406, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.496137619018555, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.07453179359436035, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.661230087280273, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.11037327349185944, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.274636268615723, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.07080935686826706, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.882203102111816, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.021933073177933693, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.670177936553955, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.008650572970509529, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.1517312228679657, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0014350098790600896, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.298566818237305, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0008075315272435546, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.36655616760254, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.02492307871580124, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.118667602539062, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.020654192194342613, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.641489028930664, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0009462415473535657, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.223227500915527, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.006026186514645815, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.541764259338379, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.010831179097294807, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.012748718261719, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.02564798854291439, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.040681838989258, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.01516793854534626, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.1528186798095703, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.0052392370998859406, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.2027999460697174, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0006737664807587862, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.249488830566406, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0009689174476079643, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.90048599243164, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.023535719141364098, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.248680114746094, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.021787654608488083, "pnorm/_forward_module.model.norm.weight": 30.873689651489258, "gnorm/_forward_module.model.norm.weight": 0.0035547856241464615, "pnorm/_forward_module.lm_head.weight": 229.08653259277344, "gnorm/_forward_module.lm_head.weight": 0.03724076971411705} +{"step": 1488977920, "pnorm/_forward_module.model.embeddings.weight": 141.3234100341797, "gnorm/_forward_module.model.embeddings.weight": 0.05253734812140465, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.772113800048828, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0016981024527922273, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.380290985107422, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.006542083341628313, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.077759742736816, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.007208861876279116, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.816399574279785, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.06592147797346115, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.79342269897461, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.06700903922319412, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0833911895751953, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.004816305357962847, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.33688825368881226, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0004066765250172466, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.99413299560547, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0009714727057144046, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.999711990356445, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.03584813326597214, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.118425369262695, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.03649679571390152, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.2392635345459, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0011028603184968233, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.212410926818848, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.00666717579588294, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.007695198059082, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.008508727885782719, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.226027488708496, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.044157445430755615, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.311334609985352, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.042394042015075684, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.385383367538452, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.007576615549623966, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1635119915008545, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0006312267505563796, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.386943817138672, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0008775184978730977, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.719499588012695, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.033379681408405304, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.606647491455078, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.025792749598622322, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.68305778503418, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0011990396305918694, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.953951835632324, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.009497753344476223, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.465391159057617, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.013468507677316666, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.687762260437012, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.03991460055112839, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.842670440673828, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.030735958367586136, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.0088906288146973, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.00623240415006876, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19254277646541595, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0005940856062807143, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.583051681518555, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0009145922376774251, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.3575496673584, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.03182484582066536, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.08248519897461, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.025120846927165985, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.193265914916992, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0009410099009983242, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.400923728942871, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.0052547939121723175, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.776908874511719, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.007596385665237904, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.14890193939209, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.03446129336953163, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.458575248718262, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.029325764626264572, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.844266176223755, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.004569970536977053, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16237851977348328, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0005658991285599768, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.267431259155273, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0027748497668653727, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.856149673461914, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.044703081250190735, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.626022338867188, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.041939131915569305, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.112924575805664, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.004985740873962641, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.505659103393555, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.04140370711684227, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.669149398803711, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.06537748128175735, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.278430938720703, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.07044295221567154, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.890669822692871, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.022273501381278038, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.671079397201538, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.007832365110516548, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15178120136260986, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0011537026148289442, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.299421310424805, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0007265189778991044, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.368831634521484, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.025830945000052452, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.121479034423828, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.022416841238737106, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.651479721069336, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0009857782861217856, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.233137130737305, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.008845590986311436, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.548938751220703, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.01551905833184719, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.024788856506348, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.026185384020209312, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.059568405151367, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.016703125089406967, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.1553757190704346, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.012951829470694065, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.2030186951160431, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0016150136943906546, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.252771377563477, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0012832039501518011, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.908504486083984, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.026429537683725357, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.254650115966797, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.024065641686320305, "pnorm/_forward_module.model.norm.weight": 30.913516998291016, "gnorm/_forward_module.model.norm.weight": 0.0032282168976962566, "pnorm/_forward_module.lm_head.weight": 229.25845336914062, "gnorm/_forward_module.lm_head.weight": 0.036953262984752655} +{"step": 1509949440, "pnorm/_forward_module.model.embeddings.weight": 141.33163452148438, "gnorm/_forward_module.model.embeddings.weight": 0.05193153768777847, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.769203186035156, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0016872099367901683, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.390849113464355, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.006966608576476574, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.087395668029785, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.008413177914917469, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.809537887573242, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.06804339587688446, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.786602020263672, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.06783075630664825, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0843405723571777, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.005484515335410833, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.33786919713020325, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.00037869837251491845, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.993038177490234, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0009445322211831808, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.998952865600586, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.03703344613313675, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.11757469177246, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.03965944051742554, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.237106323242188, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.001119338790886104, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.216598510742188, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.006491054780781269, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.010780334472656, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.008938536047935486, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.221681594848633, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.04585679993033409, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.306864738464355, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.04208604618906975, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3857004642486572, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.007643653079867363, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1636420488357544, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0005517303943634033, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.388408660888672, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0008207714417949319, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.721933364868164, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.033649034798145294, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.608234405517578, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.02691021002829075, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.683944702148438, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.001429461408406496, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.959382057189941, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.010423910804092884, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.469282150268555, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.016386820003390312, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.685415267944336, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.041635576635599136, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.840385437011719, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.030728967860341072, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.0099310874938965, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.00526405917480588, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.1925516575574875, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0003077512083109468, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.58418846130371, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0009217620827257633, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.359636306762695, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.03200826793909073, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.08485984802246, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.0252953190356493, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.196287155151367, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.001020570402033627, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.41177749633789, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.005945454817265272, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.786412239074707, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.00785202719271183, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.148480415344238, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.0345115065574646, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.458809852600098, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.029642432928085327, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.846200466156006, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.005159120075404644, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16255632042884827, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0004712569061666727, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.26522445678711, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0023406515829265118, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.851303100585938, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.04127810522913933, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.623703002929688, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.04175635054707527, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.116493225097656, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0048225694335997105, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.515142440795898, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.0391351543366909, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.677168846130371, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.054032985121011734, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.282176971435547, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.07753630727529526, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.899160385131836, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.022321123629808426, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6711299419403076, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.006508544087409973, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15173636376857758, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0008781488286331296, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.29921531677246, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.000713752000592649, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.369333267211914, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.025298111140727997, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.1231632232666, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.02126453071832657, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.66115379333496, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0011064456775784492, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.242242813110352, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.0065179080702364445, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.555590629577637, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.01093506533652544, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.036498069763184, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.025752823799848557, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.077733039855957, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.015995962545275688, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.1576952934265137, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.007540058810263872, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.20317935943603516, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0008925902075134218, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.255863189697266, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0011199692962691188, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.915464401245117, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.026959823444485664, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.260236740112305, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.025378167629241943, "pnorm/_forward_module.model.norm.weight": 30.951486587524414, "gnorm/_forward_module.model.norm.weight": 0.002356578130275011, "pnorm/_forward_module.lm_head.weight": 229.41815185546875, "gnorm/_forward_module.lm_head.weight": 0.035638391971588135} +{"step": 1530920960, "pnorm/_forward_module.model.embeddings.weight": 141.3372802734375, "gnorm/_forward_module.model.embeddings.weight": 0.04841391742229462, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.766447067260742, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0015646313549950719, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.400789260864258, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.0063665020279586315, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.096345901489258, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.007209485862404108, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.803022384643555, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.06304460018873215, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.780112266540527, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.06326435506343842, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0856075286865234, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.004498713184148073, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.33889591693878174, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0002727353130467236, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.991863250732422, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0009120333124883473, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.998043060302734, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.03502418473362923, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.116546630859375, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.03734087198972702, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.235403060913086, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0009609755361452699, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.221500396728516, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.005639285314828157, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.014350891113281, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.006754318252205849, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.217493057250977, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.04148973524570465, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.302679061889648, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.03995806723833084, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.385917901992798, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.005392624996602535, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1636238396167755, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00035987759474664927, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.390199661254883, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0008135305834002793, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.724428176879883, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.031364914029836655, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.609888076782227, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.025339864194393158, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.685047149658203, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0012392301578074694, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.965089797973633, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.010961826890707016, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.473388671875, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.015894509851932526, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.683260917663574, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.036937471479177475, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.838356018066406, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.029114803299307823, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.0105559825897217, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.006906350143253803, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.1924651861190796, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0006614853045903146, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.58517074584961, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0007744840113446116, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.361331939697266, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.029749510809779167, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.08696937561035, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.023876355960965157, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.19970703125, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0009365943260490894, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.423176765441895, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.004792120773345232, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.79623794555664, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.006901762448251247, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.148130416870117, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.03285914659500122, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.459095001220703, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.02761179581284523, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.8482306003570557, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.0036545570474117994, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16264209151268005, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0002525387972127646, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.263275146484375, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.001378426793962717, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.847379684448242, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.033951278775930405, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.622053146362305, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.03731426224112511, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.120166778564453, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0030745964031666517, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.524160385131836, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.02597479149699211, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.684815406799316, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.03409360349178314, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.285798072814941, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.07370274513959885, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.906930923461914, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.021297218278050423, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6716413497924805, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.0041878498159348965, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.1517602801322937, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.00024401528935413808, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.298364639282227, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0006895376718603075, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.368860244750977, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.02365107089281082, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.1243896484375, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.020164739340543747, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.670429229736328, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0007985431584529579, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.25068187713623, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.005592254921793938, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.561952590942383, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.008755877614021301, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.047650337219238, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.02348273992538452, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.09496021270752, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.015185757540166378, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.1592087745666504, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.003626700025051832, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.20327462255954742, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.00025187243591062725, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.258527755737305, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0011376891052350402, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.921674728393555, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.023752881214022636, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.26498031616211, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.023052280768752098, "pnorm/_forward_module.model.norm.weight": 30.98709487915039, "gnorm/_forward_module.model.norm.weight": 0.002909998642280698, "pnorm/_forward_module.lm_head.weight": 229.56576538085938, "gnorm/_forward_module.lm_head.weight": 0.03307252377271652} +{"step": 1551892480, "pnorm/_forward_module.model.embeddings.weight": 141.3407440185547, "gnorm/_forward_module.model.embeddings.weight": 0.04660690203309059, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.763931274414062, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0014899058733135462, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.410350799560547, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.00662748608738184, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.105124473571777, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.0076172687113285065, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.79688549041748, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.05842110142111778, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.7739839553833, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.059329014271497726, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0864734649658203, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.004970878828316927, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3398052752017975, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0002769292623270303, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.991344451904297, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0008916803635656834, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.997770309448242, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.033017098903656006, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.1159610748291, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.03345078229904175, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.233366012573242, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0009784854482859373, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.225470542907715, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.005888581275939941, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.017339706420898, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.0069593144580721855, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.212929725646973, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.03836136683821678, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.29824447631836, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.03672733157873154, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3865044116973877, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.006821371614933014, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16373586654663086, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0008537101675756276, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.39177131652832, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0006980585749261081, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.726455688476562, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.029308974742889404, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.611366271972656, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.02339565008878708, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.685989379882812, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.000863801222294569, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.969808578491211, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.006747142411768436, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.477059364318848, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.00835027452558279, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.681181907653809, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.03433101996779442, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.836250305175781, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.02725241892039776, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.012617349624634, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.005009262822568417, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19260646402835846, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.00040722748963162303, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.586891174316406, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0007387946825474501, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.36378288269043, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.0288309957832098, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.089452743530273, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.023050149902701378, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.20259666442871, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0008316697203554213, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.433361053466797, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.004896924365311861, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.805166244506836, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.006447851657867432, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.147587776184082, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.029295992106199265, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.459115028381348, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.02663147635757923, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.8495113849639893, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.003984327428042889, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16269628703594208, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.00038963492261245847, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.261194229125977, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.001806875690817833, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.843069076538086, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.03476063534617424, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.62029457092285, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.03578706085681915, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.123754501342773, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.003543598810210824, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.532736778259277, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.02762742154300213, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.692151069641113, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.036372989416122437, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.28872299194336, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.06826578825712204, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.913771629333496, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.020621027797460556, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6727254390716553, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.005346538033336401, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15180858969688416, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0007277269032783806, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.29783058166504, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0006579891196452081, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.368385314941406, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.02362060360610485, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.12548828125, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.020003899931907654, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.679641723632812, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0008624054025858641, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.25964641571045, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.00583116989582777, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.568631172180176, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.00994083285331726, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.058343887329102, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.02158304490149021, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.111202239990234, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.014513000845909119, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.1607871055603027, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.006419248413294554, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.20338362455368042, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0006835360545665026, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.261619567871094, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0008764219819568098, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.9283390045166, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.0221656933426857, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.270231246948242, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.020319724455475807, "pnorm/_forward_module.model.norm.weight": 31.02104377746582, "gnorm/_forward_module.model.norm.weight": 0.0033368293661624193, "pnorm/_forward_module.lm_head.weight": 229.7028045654297, "gnorm/_forward_module.lm_head.weight": 0.030175838619470596} +{"step": 1572864000, "pnorm/_forward_module.model.embeddings.weight": 141.34231567382812, "gnorm/_forward_module.model.embeddings.weight": 0.0511101670563221, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.761091232299805, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0014944735448807478, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.418560028076172, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.006235671695321798, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.112528800964355, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.007074739318341017, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.79068660736084, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.06437401473522186, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.767828941345215, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.06364325433969498, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.086941957473755, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.004765678197145462, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.34041154384613037, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0004845497605856508, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.990802764892578, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0008579209097661078, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.997217178344727, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.0346577912569046, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.11513900756836, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.039726149290800095, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.231517791748047, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0011176248081028461, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.228399276733398, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.005880988202989101, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.019572257995605, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.0074874055571854115, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.209059715270996, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.04307766631245613, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.29435920715332, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.040068164467811584, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.387221574783325, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.00714969402179122, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16385243833065033, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0005019159289076924, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.393770217895508, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0008677493315190077, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.728885650634766, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.03229229524731636, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.613039016723633, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.02695317566394806, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.686843872070312, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0012782858684659004, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.974830627441406, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.010295368731021881, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.480842590332031, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.01593465358018875, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.67902946472168, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.03943950682878494, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.833976745605469, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.029816314578056335, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.0134360790252686, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.007052162662148476, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19259196519851685, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0008759571355767548, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.587890625, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0009144030627794564, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.365083694458008, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.031422242522239685, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.091167449951172, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.026540502905845642, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.205978393554688, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.000955247669480741, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.443727493286133, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.004960155580192804, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.814384460449219, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.006652272772043943, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.147491455078125, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.036955174058675766, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.4594144821167, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.03044072538614273, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.85109281539917, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.0038881185464560986, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16281409561634064, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.00039907669997774065, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.259735107421875, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.004425464663654566, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.839826583862305, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.059626128524541855, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.6192684173584, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.05310096591711044, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.12665367126465, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.007235957309603691, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.540616989135742, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.06439444422721863, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.69881820678711, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.09599091857671738, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.29157543182373, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.07334250211715698, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.920063972473145, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.02341967634856701, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.67322039604187, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.008087008260190487, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15176637470722198, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0013108761049807072, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.297298431396484, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0007821611943654716, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.367847442626953, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.026625119149684906, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.126354217529297, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.021927157416939735, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.688087463378906, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0010033717844635248, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.267772674560547, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.007008403539657593, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.574613571166992, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.011401042342185974, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.06822681427002, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.027282273396849632, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.126143455505371, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.01634199731051922, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.161935567855835, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.00953090749680996, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.2034720778465271, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.001052219420671463, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.26384162902832, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0009182909270748496, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.933311462402344, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.02549002133309841, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.274389266967773, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.022595375776290894, "pnorm/_forward_module.model.norm.weight": 31.052589416503906, "gnorm/_forward_module.model.norm.weight": 0.003593753557652235, "pnorm/_forward_module.lm_head.weight": 229.82864379882812, "gnorm/_forward_module.lm_head.weight": 0.039079051464796066} +{"step": 1593835520, "pnorm/_forward_module.model.embeddings.weight": 141.34226989746094, "gnorm/_forward_module.model.embeddings.weight": 0.048920322209596634, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.758739471435547, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0015286377165466547, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.42687702178955, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.006441260222345591, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.120044708251953, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.007151174824684858, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.78491497039795, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.06079521030187607, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.762158393859863, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.061314500868320465, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.087322235107422, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.005053061060607433, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3410835564136505, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.00040872677345760167, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.989994049072266, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0008550931815989316, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.996349334716797, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.03364688530564308, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.114185333251953, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.0334794707596302, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.229536056518555, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0009760325192473829, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.231460571289062, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.00634295167401433, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.021903991699219, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.007837914861738682, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.205062866210938, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.040487680584192276, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.290358543395996, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.038616668432950974, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3877716064453125, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.007297574542462826, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1639394462108612, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0005739738699048758, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.395801544189453, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0007807416841387749, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.7311954498291, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.030722618103027344, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.614566802978516, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.023675622418522835, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.68805694580078, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0009266522829420865, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.979864120483398, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.007016435731202364, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.48468017578125, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.008664139546453953, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.67728042602539, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.03528078645467758, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.832291603088379, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.028138725087046623, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.014346122741699, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.0049085356295108795, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19257384538650513, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0005124473827891052, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.589078903198242, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0007789076771587133, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.366561889648438, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.029513558372855186, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.092700958251953, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.022875957190990448, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.209251403808594, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0008457418298348784, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.453250885009766, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.00508505292236805, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.822916030883789, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.006801978684961796, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.147290229797363, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.030658286064863205, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.459641456604004, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.027094844728708267, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.8529772758483887, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.00418664887547493, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16282130777835846, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0005261188489384949, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.258058547973633, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0014438950456678867, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.835988998413086, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.03462684899568558, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.617464065551758, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.03335690498352051, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.129613876342773, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.002820787485688925, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.547597885131836, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.02168707363307476, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.704668998718262, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.02756560780107975, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.294089317321777, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.06976042687892914, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.9258394241333, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.022036785259842873, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.673563003540039, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.006335618905723095, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15179000794887543, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0006447040941566229, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.296855926513672, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0007523235399276018, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.367206573486328, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.025579005479812622, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.127103805541992, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.023026296868920326, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.695764541625977, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0011645004851743579, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.274066925048828, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.010883286595344543, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.579106330871582, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.02049865387380123, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.077529907226562, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.025907723233103752, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.140000343322754, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.015810057520866394, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.164200782775879, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.020050378516316414, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.20375747978687286, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0026022789534181356, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.266286849975586, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0009953962871804833, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.93834686279297, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.025253403931856155, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.278522491455078, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.022843917831778526, "pnorm/_forward_module.model.norm.weight": 31.081966400146484, "gnorm/_forward_module.model.norm.weight": 0.0028552794829010963, "pnorm/_forward_module.lm_head.weight": 229.94252014160156, "gnorm/_forward_module.lm_head.weight": 0.03862690553069115} +{"step": 1614807040, "pnorm/_forward_module.model.embeddings.weight": 141.34100341796875, "gnorm/_forward_module.model.embeddings.weight": 0.04720361530780792, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.75642967224121, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0014735623262822628, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.434249877929688, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.005979258567094803, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.126725196838379, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.0066452487371861935, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.77946662902832, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.060822900384664536, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.756768226623535, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.0608995258808136, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0875322818756104, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.0043783956207334995, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3416237533092499, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0004952540621161461, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.989561080932617, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.00099596893414855, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.99574089050293, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.034568481147289276, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.113462448120117, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.038843732327222824, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.22785758972168, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0010164406849071383, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.234116554260254, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.00605815602466464, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.023839950561523, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.007085306104272604, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.201482772827148, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.04089934006333351, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.28678035736084, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.03818514198064804, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.388753890991211, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.006844379473477602, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16406431794166565, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00048476678784936666, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.397716522216797, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0008764974190853536, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.733348846435547, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.031428683549165726, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.615951538085938, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.02615945227444172, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.688640594482422, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0014776411699131131, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.983821868896484, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.011906755156815052, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.487586975097656, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.018466467037796974, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.675104141235352, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.03695788234472275, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.830079078674316, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.027732806280255318, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.01434326171875, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.0053502474911510944, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19251984357833862, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0004936269833706319, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.5899658203125, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0008425931446254253, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.367460250854492, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.029939115047454834, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.09379768371582, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.024154895916581154, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.211984634399414, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0008909463649615645, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.460700035095215, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.00480251619592309, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.829201698303223, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.006445455830544233, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.147040367126465, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.03174208477139473, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.459756851196289, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.02700420841574669, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.8549816608428955, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.004829673562198877, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16297473013401031, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0006523000774905086, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.25634002685547, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.002242951886728406, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.83220672607422, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.03873131424188614, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.615859985351562, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.041986625641584396, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.132781982421875, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0043141795322299, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.554150581359863, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.03924781084060669, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.710217475891113, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.05586624518036842, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.29671573638916, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.07275590300559998, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.931479454040527, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.02056742087006569, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.674154281616211, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.00469741877168417, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15176358819007874, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0004519731446634978, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.2960147857666, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0006718530785292387, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.365936279296875, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.023274937644600868, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.127418518066406, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.01936795935034752, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.70330238342285, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.000806099153123796, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.28111457824707, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.004994237329810858, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.584203720092773, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.00769190676510334, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.08635139465332, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.02246537059545517, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.15308666229248, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.014324829913675785, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.165949583053589, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.004256530199199915, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.2038712352514267, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0003398848930373788, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.2685489654541, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.00081594631774351, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.942760467529297, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.021546516567468643, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.282100677490234, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.01945408619940281, "pnorm/_forward_module.model.norm.weight": 31.109350204467773, "gnorm/_forward_module.model.norm.weight": 0.002632853575050831, "pnorm/_forward_module.lm_head.weight": 230.0476531982422, "gnorm/_forward_module.lm_head.weight": 0.031549569219350815} +{"step": 1635778560, "pnorm/_forward_module.model.embeddings.weight": 141.33865356445312, "gnorm/_forward_module.model.embeddings.weight": 0.04757488891482353, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.754228591918945, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.001384550821967423, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.441152572631836, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.006228774320334196, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.133137702941895, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.006767550017684698, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.774319648742676, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.057370979338884354, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.751606941223145, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.05874723196029663, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.08780837059021, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.004475228022783995, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3422457277774811, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.00033900741254910827, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.988998413085938, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0008607521303929389, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.994863510131836, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.03271064534783363, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.112613677978516, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.0334082767367363, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.22633934020996, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.001017098780721426, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.237645149230957, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.006288683973252773, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.026549339294434, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.007115233689546585, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.197785377502441, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.038260962814092636, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.283203125, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.036407604813575745, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3888845443725586, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.005560883786529303, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16406819224357605, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0005824992549605668, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.399370193481445, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0007806739886291325, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.735008239746094, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.03008354641497135, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.61709976196289, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.023673608899116516, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.68962860107422, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.00101920694578439, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.988675117492676, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.009005491621792316, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.491043090820312, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.01150592789053917, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.67341423034668, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.033890292048454285, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.828376770019531, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.02702590823173523, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.0140671730041504, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.004927969072014093, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19240893423557281, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.00024775939527899027, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.59140396118164, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.000777918437961489, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.36893653869629, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.02873246744275093, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.095317840576172, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.02303978241980076, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.214540481567383, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0008132493239827454, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.468366622924805, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.004533540923148394, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.836150169372559, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.00603446876630187, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.146756172180176, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.02806384116411209, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.459814071655273, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.02595445141196251, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.8557450771331787, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.0035003244411200285, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16296668350696564, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0002932557254098356, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.254663467407227, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.00223099859431386, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.828426361083984, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.038576580584049225, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.614212036132812, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.0375615730881691, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.135725021362305, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.003881220007315278, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.56001091003418, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.03459172323346138, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.715176582336426, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.04954523220658302, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.298830032348633, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.06558841466903687, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.936346054077148, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.019765015691518784, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.674858570098877, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.0044510760344564915, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15175460278987885, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0002696436131373048, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.296005249023438, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.000667211483232677, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.365476608276367, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.023003777489066124, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.128128051757812, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.020570436492562294, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.710079193115234, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0008972841314971447, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.287060737609863, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.009145347401499748, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.588678359985352, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.016169724985957146, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.094293594360352, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.021083880215883255, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.164840698242188, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.014314854517579079, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.1673355102539062, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.016458092257380486, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.20400190353393555, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0020926084835082293, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.270824432373047, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0008035209029912949, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.946929931640625, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.02206314168870449, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.285545349121094, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.019932396709918976, "pnorm/_forward_module.model.norm.weight": 31.13481330871582, "gnorm/_forward_module.model.norm.weight": 0.002822037087753415, "pnorm/_forward_module.lm_head.weight": 230.14434814453125, "gnorm/_forward_module.lm_head.weight": 0.02865600772202015} +{"step": 1656750080, "pnorm/_forward_module.model.embeddings.weight": 141.3353729248047, "gnorm/_forward_module.model.embeddings.weight": 0.045767735689878464, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.7523250579834, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0013880071928724647, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.447632789611816, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.006260499823838472, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.139086723327637, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.006955510936677456, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.76955509185791, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.05670694634318352, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.74685287475586, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.05645138770341873, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0883209705352783, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.004344481974840164, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.34299567341804504, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0003865035832859576, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.988807678222656, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0008230588282458484, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.994258880615234, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.03145426884293556, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.111921310424805, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.03078995831310749, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.225000381469727, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0008509355247952044, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.24043083190918, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.0060376618057489395, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.02871036529541, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.007183439563959837, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.194581031799316, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.03519470617175102, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.280010223388672, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.03438444435596466, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3890485763549805, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.006133364047855139, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.1641572117805481, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00039407069562003016, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.40087127685547, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0007405579672195017, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.736330032348633, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.029081245884299278, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.618013381958008, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.02212236076593399, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.690513610839844, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0009631455759517848, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.992246627807617, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.0071508740074932575, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.49371337890625, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.00956418551504612, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.67161750793457, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.03050004318356514, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.826481819152832, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.025240924209356308, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.015068292617798, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.004594374913722277, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19245830178260803, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.00036125193582847714, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.59219741821289, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0007131033344194293, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.369598388671875, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.026935169473290443, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.096328735351562, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.021306097507476807, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.216447830200195, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0007444787188433111, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.474422454833984, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.0047043669037520885, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.841529846191406, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.006039207335561514, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.146206855773926, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.02483394369482994, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.459602355957031, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.02398025430738926, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.8566036224365234, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.0034781296271830797, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16300326585769653, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.00031498042517341673, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.252973556518555, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0010222316486760974, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.824726104736328, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.029246358200907707, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.612455368041992, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.03061879612505436, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.138673782348633, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0022622286342084408, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.565945625305176, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.017217691987752914, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.720237731933594, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.02297990396618843, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.301126480102539, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.05887453258037567, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.9413480758667, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.01862250827252865, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6752803325653076, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.004864828195422888, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15177635848522186, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0003431830264162272, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.295379638671875, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0006039082072675228, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.36431884765625, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.021491989493370056, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.12837028503418, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.018885577097535133, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.716432571411133, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0007775098783895373, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.292031288146973, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.006435771472752094, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.592394828796387, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.011443408206105232, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.101969718933105, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.019292263314127922, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.175951957702637, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.013291585259139538, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.1689045429229736, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.008386734873056412, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.20405954122543335, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0009920165175572038, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.27320098876953, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0007301043951883912, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.951282501220703, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.020319920033216476, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.28899383544922, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.018865354359149933, "pnorm/_forward_module.model.norm.weight": 31.158222198486328, "gnorm/_forward_module.model.norm.weight": 0.0028471893165260553, "pnorm/_forward_module.lm_head.weight": 230.23097229003906, "gnorm/_forward_module.lm_head.weight": 0.030748317018151283} +{"step": 1677721600, "pnorm/_forward_module.model.embeddings.weight": 141.3314971923828, "gnorm/_forward_module.model.embeddings.weight": 0.04457836225628853, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.750410079956055, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0013749272329732776, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.453429222106934, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.006441161967813969, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.144365310668945, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.007166095543652773, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.764918327331543, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.05552033334970474, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.742239952087402, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.05573180690407753, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.088689088821411, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.004349282942712307, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3435988128185272, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.00029756189906038344, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.988555908203125, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.000785376934800297, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.99349594116211, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.03101898916065693, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.111061096191406, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.031812816858291626, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.223703384399414, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0009128207457251847, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.24272632598877, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.005706528201699257, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.030427932739258, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.007073861546814442, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.191509246826172, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.03498519957065582, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.276944160461426, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.034492623060941696, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3895680904388428, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.006328893825411797, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16418664157390594, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.000504787138197571, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.402450561523438, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0007585044368170202, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.73763656616211, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.028566794469952583, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.618927001953125, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.02255276031792164, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.691267013549805, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.00100757647305727, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.995866775512695, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.007994938641786575, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.496451377868652, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.011083870194852352, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.669782638549805, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.030517227947711945, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.82458209991455, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.025622094050049782, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.016174554824829, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.004658149555325508, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19240537285804749, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.00031011266401037574, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.593311309814453, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.000734008033759892, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.37053871154785, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.026930343359708786, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.097469329833984, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.021812088787555695, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.218647003173828, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0007071401923894882, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.480846405029297, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.0044704158790409565, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.84736156463623, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.005795625038444996, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.145715713500977, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.025376591831445694, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.459341049194336, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.024356767535209656, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.8579046726226807, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.003279224969446659, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.1630816012620926, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0002195754204876721, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.25159454345703, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0015127718215808272, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.8217716217041, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.03226598724722862, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.611326217651367, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.033500321209430695, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.141538619995117, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0032572660129517317, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.571484565734863, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.023481866344809532, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.72481918334961, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.032515477389097214, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.30329704284668, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.06233000010251999, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.945817947387695, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.01930958963930607, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.675361156463623, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.004867882933467627, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15172478556632996, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.00037178758066147566, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.29483413696289, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0006033832323737442, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.363096237182617, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.0221747774630785, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.128450393676758, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.019841747358441353, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.72258186340332, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.00096935557667166, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.296998977661133, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.008550758473575115, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.596135139465332, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.014956512488424778, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.109275817871094, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.01983051560819149, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.186471939086914, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.013492926955223083, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.170198440551758, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.01723027601838112, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.20416401326656342, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.002247962635010481, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.275026321411133, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0008263947675004601, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.95429801940918, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.021269390359520912, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.291799545288086, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.01933368295431137, "pnorm/_forward_module.model.norm.weight": 31.179662704467773, "gnorm/_forward_module.model.norm.weight": 0.0026700040325522423, "pnorm/_forward_module.lm_head.weight": 230.30889892578125, "gnorm/_forward_module.lm_head.weight": 0.028524892404675484} +{"step": 1698693120, "pnorm/_forward_module.model.embeddings.weight": 141.32713317871094, "gnorm/_forward_module.model.embeddings.weight": 0.04646812379360199, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.748613357543945, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0012708855792880058, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.459004402160645, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.006520767230540514, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.149462699890137, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.007195473648607731, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.760513305664062, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.0557873360812664, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.737835884094238, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.05659421160817146, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.088768243789673, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.004496078472584486, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.34414175152778625, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0002931281633209437, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.988605499267578, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0007938371854834259, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.993017196655273, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.0319770947098732, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.110374450683594, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.0311867818236351, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.222606658935547, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0009318734519183636, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.244863510131836, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.0059065246023237705, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.032025337219238, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.006881259847432375, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.188813209533691, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.03593457117676735, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.274312019348145, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.03453003242611885, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.389810800552368, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.0053470442071557045, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16418497264385223, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00025779896532185376, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.40403938293457, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0007064284291118383, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.738962173461914, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.028566382825374603, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.619709014892578, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.02226276323199272, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.69227409362793, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0008894866914488375, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 12.999446868896484, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.008246667683124542, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.49921703338623, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.010065573267638683, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.668205261230469, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.03021909110248089, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.82294750213623, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.025197885930538177, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.017127513885498, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.006021092180162668, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19240275025367737, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0004248698242008686, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.594213485717773, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0007393914856947958, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.371129989624023, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.027172435075044632, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.09836196899414, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.0211710873991251, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.22066307067871, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0007337935385294259, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.48641300201416, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.0044356151483953, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.852331161499023, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.005689945537596941, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.145352363586426, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.02573395147919655, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.459196090698242, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.02443138137459755, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.8589794635772705, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.0029780438635498285, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16310212016105652, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.00014836432819720358, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.250167846679688, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0014407476410269737, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.818742752075195, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.03152673318982124, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.61000633239746, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.03071190044283867, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.14417266845703, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0025847710203379393, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.576818466186523, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.02043353207409382, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.729241371154785, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.02806456759572029, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.305159568786621, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.058703966438770294, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.949634552001953, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.019056010991334915, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.675274133682251, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.003863073419779539, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.1516776829957962, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.000314348260872066, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.29477882385254, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0006044832989573479, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.362510681152344, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.021860146895051003, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.12875747680664, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.018365688621997833, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.728137969970703, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0007394634885713458, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.301671028137207, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.005165171343833208, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.599649429321289, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.008052808232605457, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.115818977355957, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.019191857427358627, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.195630073547363, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.013294970616698265, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.171499252319336, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.005738761741667986, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.2043353170156479, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0003008887288160622, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.276885986328125, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0008119405247271061, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.95726203918457, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.02144782431423664, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.294404983520508, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.01995152235031128, "pnorm/_forward_module.model.norm.weight": 31.199254989624023, "gnorm/_forward_module.model.norm.weight": 0.0031494402792304754, "pnorm/_forward_module.lm_head.weight": 230.37979125976562, "gnorm/_forward_module.lm_head.weight": 0.030216289684176445} +{"step": 1719664640, "pnorm/_forward_module.model.embeddings.weight": 141.3224639892578, "gnorm/_forward_module.model.embeddings.weight": 0.044098444283008575, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.747051239013672, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0012050755321979523, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.46363353729248, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.006373214069753885, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.153785705566406, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.007084324024617672, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.756646156311035, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.05301236733794212, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.734058380126953, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.05417168140411377, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.08933687210083, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.004998547490686178, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.34481239318847656, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0001902189542306587, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.98849105834961, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0007446189993061125, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.992311477661133, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.030483612790703773, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.109601974487305, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.02915780059993267, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.221437454223633, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0008856907952576876, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.246373176574707, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.005490125622600317, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.033134460449219, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.00665226299315691, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.186223983764648, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.03365691006183624, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.271739959716797, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.03310658410191536, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3902347087860107, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.005848998203873634, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16427794098854065, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0003572382847778499, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.405744552612305, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.00073169672396034, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.7403564453125, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.027907395735383034, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.62066650390625, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.021249011158943176, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.692989349365234, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0008750006672926247, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 13.002433776855469, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.006893828045576811, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.501418113708496, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.009311062283813953, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.666671752929688, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.028285710141062737, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.82129192352295, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.024083560332655907, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.017808437347412, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.004369791597127914, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.1924019753932953, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.00036636408185586333, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.59502601623535, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0007044600788503885, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.371519088745117, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.02605457417666912, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.09892463684082, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.020455820485949516, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.222238540649414, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.000696919800247997, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.49091625213623, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.00445895828306675, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.856532096862793, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.005739685148000717, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.144901275634766, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.024114608764648438, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.45893383026123, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.022920668125152588, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.8596014976501465, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.00343241891823709, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16310960054397583, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0003306750732008368, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.24860191345215, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0014910208992660046, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.81548309326172, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.0316833071410656, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.608409881591797, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.029338061809539795, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.146799087524414, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0025683606509119272, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.58143138885498, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.02432643249630928, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.733238220214844, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.03307809680700302, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.306782722473145, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.05167650058865547, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.953105926513672, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.017786890268325806, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.675560712814331, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.0036644453648477793, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.151668980717659, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0002986992767546326, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.294414520263672, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0005617919377982616, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.361539840698242, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.020790787413716316, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.12881851196289, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.018145909532904625, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.733657836914062, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0007155701168812811, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.305914878845215, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.006153097841888666, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.60292911529541, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.01006747130304575, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.122344970703125, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.01790950819849968, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.204632759094238, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.012694740667939186, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.172700881958008, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.006039721891283989, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.20442448556423187, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0006556878797709942, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.278366088867188, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0008072779164649546, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.959609985351562, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.021046575158834457, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.29648780822754, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.019490491598844528, "pnorm/_forward_module.model.norm.weight": 31.217206954956055, "gnorm/_forward_module.model.norm.weight": 0.0028552778530865908, "pnorm/_forward_module.lm_head.weight": 230.44320678710938, "gnorm/_forward_module.lm_head.weight": 0.027377966791391373} +{"step": 1740636160, "pnorm/_forward_module.model.embeddings.weight": 141.31765747070312, "gnorm/_forward_module.model.embeddings.weight": 0.04374431073665619, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.74544334411621, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0013479077024385333, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.46805477142334, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.005862601101398468, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.157923698425293, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.006532947067171335, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.752814292907715, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.05464436486363411, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.730278968811035, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.05402659252285957, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0894007682800293, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.004084280226379633, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3452194631099701, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0003912253596354276, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.98843765258789, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0007420337060466409, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.991670608520508, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.030670249834656715, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.108970642089844, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.02975250594317913, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.220565795898438, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0008868594304658473, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.248115539550781, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.005454434081912041, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.03450870513916, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.006727287080138922, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.184011459350586, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.03511591628193855, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.269489288330078, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.03310489282011986, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3905839920043945, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.00554592814296484, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16425105929374695, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00040611528675071895, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.40719223022461, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0007507610716857016, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.741426467895508, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.02812054753303528, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.621349334716797, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.02127191238105297, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.69375228881836, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0010065066162496805, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 13.005029678344727, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.007367710582911968, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.503368377685547, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.010648677125573158, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.665535926818848, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.029349099844694138, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.820107460021973, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.024054497480392456, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.0178894996643066, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.004566058050841093, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19230496883392334, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.000343720632372424, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.59613609313965, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0007002403144724667, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.37226676940918, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.02574189007282257, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.099742889404297, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.02016708068549633, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.223827362060547, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.000676943629514426, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.495512962341309, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.004287391435354948, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.860774040222168, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.005470627918839455, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.14437198638916, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.024206699803471565, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.45868968963623, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.02324538119137287, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.860445737838745, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.003133704187348485, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16309083998203278, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0003186201793141663, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.247282028198242, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.001129628042690456, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.81267547607422, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.028515703976154327, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.60700225830078, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.028810465708374977, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.149188995361328, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0022156876511871815, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.585615158081055, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.017374495044350624, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.736748695373535, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.021080663427710533, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.308297157287598, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.0555841401219368, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.956136703491211, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.018375184386968613, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6758663654327393, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.0045777298510074615, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.1516610085964203, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.00037659023655578494, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.294111251831055, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0005856971838511527, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.360551834106445, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.02170778624713421, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.12883186340332, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.019352290779352188, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.738540649414062, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0007952895830385387, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.310104370117188, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.008078871294856071, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.606133460998535, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.014348835684359074, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.127949714660645, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.01940801925957203, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.212409973144531, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.013155367225408554, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.173522710800171, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.013458347879350185, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.20451031625270844, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0018207915127277374, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.279876708984375, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0007451485143974423, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.961753845214844, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.020600268617272377, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.298538208007812, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.018785132095217705, "pnorm/_forward_module.model.norm.weight": 31.233287811279297, "gnorm/_forward_module.model.norm.weight": 0.002860505599528551, "pnorm/_forward_module.lm_head.weight": 230.4984130859375, "gnorm/_forward_module.lm_head.weight": 0.02886488474905491} +{"step": 1761607680, "pnorm/_forward_module.model.embeddings.weight": 141.3128662109375, "gnorm/_forward_module.model.embeddings.weight": 0.04242623597383499, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.74394989013672, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.001239114673808217, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.47174072265625, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.005935885943472385, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.16130256652832, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.006417996250092983, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.749356269836426, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.050880927592515945, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.726871490478516, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.052323974668979645, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0895023345947266, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.003955993801355362, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.34565597772598267, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.00024125447089318186, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.9882869720459, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0007192182238213718, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.99089241027832, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.028875108808279037, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.1082763671875, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.026964064687490463, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.219635009765625, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0007898774347268045, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.249699592590332, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.005372361745685339, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.035743713378906, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.00637369928881526, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.18175220489502, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.032032690942287445, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.267245292663574, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.03125397861003876, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.390927791595459, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.005169576033949852, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16434097290039062, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0002584047324489802, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.408485412597656, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.00069802301004529, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.74228286743164, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.02700229361653328, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.62183380126953, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.020014960318803787, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.69439697265625, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0008538188994862139, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 13.007386207580566, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.006125689018517733, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.50515365600586, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.008358290418982506, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.664336204528809, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.02842821180820465, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.818830490112305, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.022983407601714134, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.018373966217041, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.005251115653663874, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19233326613903046, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.00039881363045424223, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.59699821472168, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0006587339448742568, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.372703552246094, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.024479229003190994, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.100383758544922, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.01867910660803318, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.225322723388672, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.000741486088372767, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.499319076538086, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.004223366267979145, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.864100456237793, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.005441361106932163, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.144010543823242, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.02334924228489399, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.458531379699707, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.02196965366601944, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.8614375591278076, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.003326319856569171, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16316543519496918, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0003399289562366903, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.246122360229492, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0013873933348804712, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.81016731262207, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.029685668647289276, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.605783462524414, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.024066831916570663, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.151409149169922, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0023149061016738415, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.589319229125977, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.02264171838760376, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.739851951599121, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.030083751305937767, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.309725761413574, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.03906789422035217, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.958964347839355, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.01659349910914898, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.676025390625, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.00397244468331337, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.1516624093055725, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0002968880580738187, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.29405403137207, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0005444154376164079, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.35979652404785, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.019778765738010406, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.128908157348633, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.016906000673770905, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.742839813232422, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0007065861136652529, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.3133544921875, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.006025639362633228, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.608555793762207, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.009512268006801605, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.133191108703613, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.01563129760324955, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.219612121582031, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.011841128580272198, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.174015522003174, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.007425522431731224, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.20457735657691956, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0008805630495771766, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.28134536743164, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0006317324587143958, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.963899612426758, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.018571754917502403, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.30035972595215, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.016866568475961685, "pnorm/_forward_module.model.norm.weight": 31.247705459594727, "gnorm/_forward_module.model.norm.weight": 0.0028041834011673927, "pnorm/_forward_module.lm_head.weight": 230.54808044433594, "gnorm/_forward_module.lm_head.weight": 0.024733761325478554} +{"step": 1782579200, "pnorm/_forward_module.model.embeddings.weight": 141.30825805664062, "gnorm/_forward_module.model.embeddings.weight": 0.04376252368092537, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.742685317993164, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0012898995773866773, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.475171089172363, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.006154044531285763, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.164497375488281, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.00683760829269886, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.746288299560547, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.05268286541104317, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.723827362060547, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.05299519747495651, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0894336700439453, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.004461096134036779, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3459966778755188, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.00040403963066637516, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.988378524780273, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0007294489769265056, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.990398406982422, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.03046494722366333, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.107681274414062, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.029264362528920174, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.21891212463379, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0008650976233184338, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.2510986328125, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.005510939750820398, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.036839485168457, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.006578164640814066, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.179903030395508, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.03303442522883415, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.265384674072266, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.0322522334754467, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3908510208129883, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.004886684473603964, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16423611342906952, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00032275827834382653, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.409650802612305, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.00073054718086496, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.743005752563477, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.02738231234252453, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.622270584106445, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.020985959097743034, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.694520950317383, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0007787040085531771, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 13.00904369354248, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.007099831011146307, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.50649642944336, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.008473278023302555, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.662870407104492, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.028002368286252022, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.81733512878418, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.024197222664952278, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.01823353767395, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.0048216478899121284, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19219885766506195, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0004495200701057911, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.597875595092773, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.000701993121765554, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.37311553955078, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.025777896866202354, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.100955963134766, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.0201650932431221, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.226829528808594, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0006988884997554123, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.503414154052734, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.0042933207005262375, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.867803573608398, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.005476228892803192, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.143648147583008, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.023684924468398094, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.458301544189453, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.02343611977994442, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.8619906902313232, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.00287159183062613, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.1631055772304535, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.00021930279035586864, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.24517059326172, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0013145327102392912, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.808008193969727, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.030892981216311455, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.604907989501953, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.028328049927949905, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.153366088867188, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0021365832071751356, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.59267807006836, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.01854988932609558, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.742753028869629, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.023898480460047722, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.310995101928711, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.05037117749452591, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.96133804321289, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.017826072871685028, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6760201454162598, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.004757072776556015, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.151624396443367, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0002623242326080799, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.293542861938477, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0005538974655792117, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.35841178894043, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.020045241340994835, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.128637313842773, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.017198530957102776, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.746788024902344, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.000712825043592602, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.31606388092041, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.004861199297010899, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.61056137084961, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.006968318950384855, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.13782787322998, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.01784580387175083, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.225886344909668, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.012429581955075264, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.1749846935272217, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.004118995275348425, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.2047228366136551, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0002863032859750092, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.282434463500977, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0006591529818251729, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.96518898010254, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.019008895382285118, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.301733016967773, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.017291050404310226, "pnorm/_forward_module.model.norm.weight": 31.260488510131836, "gnorm/_forward_module.model.norm.weight": 0.0027645945083349943, "pnorm/_forward_module.lm_head.weight": 230.59144592285156, "gnorm/_forward_module.lm_head.weight": 0.026357533410191536} +{"step": 1803550720, "pnorm/_forward_module.model.embeddings.weight": 141.3038330078125, "gnorm/_forward_module.model.embeddings.weight": 0.0414767861366272, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.741273880004883, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.001209333073347807, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.47747802734375, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.005521667655557394, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.166640281677246, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.006047818344086409, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.74338436126709, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.05135146528482437, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.720975875854492, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.052284158766269684, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0898168087005615, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.0038229150231927633, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.34645915031433105, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.00041592129855416715, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.988435745239258, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0007449144031852484, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.989892959594727, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.02912786602973938, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.107118606567383, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.02923515997827053, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.2181339263916, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0007790939998812973, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.25199031829834, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.005341562442481518, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.037497520446777, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.0063261790201067924, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.178116798400879, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.032515715807676315, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.26358699798584, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.03166607394814491, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.391305446624756, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.005807955749332905, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16431716084480286, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0004574889608193189, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.410547256469727, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0007178762461990118, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.743457794189453, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.027170995250344276, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.622526168823242, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.020978420972824097, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.695068359375, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.000827554555144161, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 13.010787010192871, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.00621257396414876, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.507866859436035, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.008370630443096161, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.662008285522461, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.028891421854496002, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.816421508789062, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.022995654493570328, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.0182981491088867, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.004362782929092646, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19215358793735504, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0002743955119512975, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.598468780517578, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0007299768622033298, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.373193740844727, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.025586159899830818, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.101261138916016, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.02031632326543331, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.228086471557617, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.00069015211192891, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.506431579589844, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.004302767105400562, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.870383262634277, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.005728777032345533, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.14344310760498, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.023542020469903946, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.458202362060547, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.02252981811761856, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.86248779296875, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.004104613326489925, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16313791275024414, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.00043242922401987016, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.244272232055664, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.002046521520242095, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.805944442749023, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.03385389596223831, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.60397720336914, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.032552123069763184, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.155214309692383, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.003344587981700897, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.59568977355957, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.029748469591140747, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.7452974319458, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.04264324530959129, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.31204605102539, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.05294265225529671, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.963422775268555, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.017350468784570694, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.676112651824951, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.004702881909906864, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15162453055381775, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0005959529662504792, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.29327964782715, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.000562163710128516, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.357379913330078, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.020424989983439445, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.128437042236328, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.017379822209477425, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.750246047973633, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0006959867314435542, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.318262100219727, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.006178280338644981, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.612203598022461, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.009032190777361393, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.142003059387207, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.017475394532084465, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.231471061706543, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.012112557888031006, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.1755242347717285, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.011636406183242798, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.20474447309970856, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0010570964077487588, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.283708572387695, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0008194710244424641, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.966854095458984, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.019648918882012367, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.303136825561523, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.01824212074279785, "pnorm/_forward_module.model.norm.weight": 31.27166748046875, "gnorm/_forward_module.model.norm.weight": 0.0029304521158337593, "pnorm/_forward_module.lm_head.weight": 230.62892150878906, "gnorm/_forward_module.lm_head.weight": 0.02676154486835003} +{"step": 1824522240, "pnorm/_forward_module.model.embeddings.weight": 141.2996826171875, "gnorm/_forward_module.model.embeddings.weight": 0.03983566537499428, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.740142822265625, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.001100412686355412, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.479997634887695, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.005553606431931257, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.169002532958984, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.00610389607027173, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.740812301635742, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.04693936929106712, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.718417167663574, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.04896404966711998, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.089797258377075, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.003791254013776779, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3467184007167816, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0003196002508047968, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.98870849609375, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0006725366110913455, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.9896240234375, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.027348315343260765, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.106740951538086, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.02541610784828663, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.217464447021484, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0007439829641953111, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.25283432006836, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.004918637219816446, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.038154602050781, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.005676165223121643, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.176529884338379, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.029481803998351097, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.261994361877441, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.029859626665711403, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3915863037109375, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.00465731043368578, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16434034705162048, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00025714622461237013, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.41158103942871, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0006501044845208526, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.744081497192383, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.025628097355365753, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.622915267944336, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.019386712461709976, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.695505142211914, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0008713708375580609, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 13.012391090393066, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.007118835113942623, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.509132385253906, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.010006715543568134, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.661165237426758, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.025744199752807617, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.815516471862793, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.022235898301005363, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.0186009407043457, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.004147120285779238, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19214430451393127, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.000302297092275694, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.599018096923828, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0006662093219347298, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.373287200927734, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.024263620376586914, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.10149383544922, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.018454954028129578, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.22920799255371, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0006150074768811464, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.509422302246094, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.004191779065877199, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.873003005981445, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.005198404658585787, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.143115043640137, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.02076118439435959, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.457987785339355, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.0214578527957201, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.86264967918396, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.003476841142401099, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16308249533176422, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.00029747968073934317, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.243576049804688, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0016574787441641092, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.80425453186035, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.03159962221980095, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.603221893310547, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.025302501395344734, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.15671157836914, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0026982370764017105, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.59824275970459, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.027324222028255463, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.747446060180664, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.03339790180325508, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.312973022460938, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.03763935714960098, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.96514892578125, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.016039595007896423, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6761817932128906, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.005279638338834047, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15164099633693695, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.000579073210246861, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.29310417175293, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0005118696135468781, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.356529235839844, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.019656851887702942, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.12828826904297, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.017070874571800232, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.753463745117188, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0006981016485951841, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.320535659790039, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.005933473352342844, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.613935470581055, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.01071224082261324, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.145620346069336, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.015939878299832344, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.236308097839355, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.011309727095067501, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.1758480072021484, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.007823674939572811, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.20476634800434113, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0010428635869175196, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.284658432006836, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0006302759284153581, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.967975616455078, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.018343886360526085, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.30419921875, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.01688198558986187, "pnorm/_forward_module.model.norm.weight": 31.281511306762695, "gnorm/_forward_module.model.norm.weight": 0.003244763473048806, "pnorm/_forward_module.lm_head.weight": 230.66163635253906, "gnorm/_forward_module.lm_head.weight": 0.024462036788463593} +{"step": 1845493760, "pnorm/_forward_module.model.embeddings.weight": 141.29591369628906, "gnorm/_forward_module.model.embeddings.weight": 0.04256848618388176, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.739164352416992, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0012569489190354943, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.482110977172852, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.005833090748637915, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.17095947265625, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.006384486798197031, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.738602638244629, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.0498042032122612, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.716216087341309, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.051699042320251465, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0897762775421143, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.00398486852645874, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.346973180770874, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.000329759088344872, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.988679885864258, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0007758596329949796, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.989097595214844, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.029185757040977478, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.10626792907715, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.02868049591779709, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.216999053955078, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0008192642708308995, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.25362777709961, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.005452392622828484, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.038776397705078, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.006461069453507662, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.175283432006836, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.030755365267395973, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.26074504852295, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.030430717393755913, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3916091918945312, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.0062323445454239845, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16433653235435486, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0003037193964701146, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.412553787231445, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0007618989911861718, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.744661331176758, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.027392804622650146, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.62322998046875, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.02108249068260193, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.695934295654297, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0013919677585363388, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 13.013832092285156, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.011270029470324516, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.51021957397461, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.01747334562242031, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.66041374206543, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.029461365193128586, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.814711570739746, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.021980686113238335, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.018744468688965, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.0041197980754077435, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19211970269680023, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0003610026615206152, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.59978485107422, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0006461621378548443, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.37360954284668, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.02411271259188652, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.101869583129883, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.018238037824630737, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.23008155822754, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0006399019621312618, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.51136302947998, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.004264742136001587, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.874751091003418, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.005513256415724754, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.142865180969238, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.020797323435544968, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.457807540893555, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.02090618759393692, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.863379955291748, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.0032838196493685246, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.1631428599357605, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0003411423822399229, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.242813110351562, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.001079728128388524, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.802478790283203, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.027443913742899895, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.60231590270996, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.02244594134390354, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.158018112182617, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0018748992588371038, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.600379943847656, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.016183866187930107, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.749288558959961, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.020664149895310402, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.313770294189453, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.03889453783631325, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.966736793518066, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.015846993774175644, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.676079273223877, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.003405660390853882, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15160751342773438, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0002514254301786423, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.292890548706055, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0005343262455426157, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.355710983276367, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.019120758399367332, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.128087997436523, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.016291480511426926, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.756385803222656, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0005700993933714926, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.32247543334961, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.00509980320930481, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.615431785583496, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.007201238069683313, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.148877143859863, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.014445850625634193, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.24058723449707, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.011308695189654827, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.1764941215515137, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.005021743942052126, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.20482541620731354, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0003753769560717046, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.285297393798828, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0005924890865571797, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.968677520751953, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.01789184845983982, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.304946899414062, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.016436470672488213, "pnorm/_forward_module.model.norm.weight": 31.289995193481445, "gnorm/_forward_module.model.norm.weight": 0.0026825186796486378, "pnorm/_forward_module.lm_head.weight": 230.68954467773438, "gnorm/_forward_module.lm_head.weight": 0.024597734212875366} +{"step": 1866465280, "pnorm/_forward_module.model.embeddings.weight": 141.29249572753906, "gnorm/_forward_module.model.embeddings.weight": 0.04143253341317177, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.73828125, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0011583642335608602, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.48376750946045, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.005803941283375025, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.172517776489258, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.0064353663474321365, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.736664772033691, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.04923977330327034, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.714289665222168, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.05024658143520355, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.08979868888855, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.004187328740954399, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3471689224243164, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0003325626312289387, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.98887825012207, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0007046294049359858, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.988815307617188, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.028525685891509056, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.10591697692871, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.02677803859114647, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.216543197631836, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0007926668040454388, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.254283905029297, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.005194379482418299, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.039275169372559, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.006216716021299362, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.17410945892334, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.03069375827908516, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.259580612182617, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.029683172702789307, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.391710042953491, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.0051818182691931725, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16435779631137848, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0003182909858878702, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.413387298583984, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0006940275197848678, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.745153427124023, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.02619919367134571, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.62350845336914, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.01955167017877102, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.69618797302246, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0007661737618036568, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 13.014897346496582, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.00594059843569994, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.511032104492188, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.007651661057025194, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.659655570983887, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.025954559445381165, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.813899040222168, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.021725405007600784, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.018852949142456, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.004460211843252182, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19212377071380615, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.000406297214794904, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.600383758544922, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0007003470091149211, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.373781204223633, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.023987844586372375, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.102121353149414, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.018457001075148582, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.230846405029297, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0006039486033841968, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.5131254196167, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.004225557669997215, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.87633991241455, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.005305594764649868, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.142632484436035, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.022110039368271828, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.457642555236816, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.021136952564120293, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.863755226135254, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.003206930821761489, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16314299404621124, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.00018158108287025243, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.24213409423828, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0015581885818392038, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.800901412963867, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.02986026741564274, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.60154914855957, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.025984356179833412, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.159168243408203, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0024949251674115658, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.602022171020508, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.024622179567813873, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.750696182250977, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.033959612250328064, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.31443977355957, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.04188300669193268, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.968058586120605, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.016020121052861214, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6758313179016113, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.0033554525580257177, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15156374871730804, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.00019093253649771214, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.292654037475586, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0004909314448013902, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.35492706298828, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.01900440640747547, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.12788963317871, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.016400251537561417, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.758869171142578, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0006179916090331972, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.32396125793457, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.00536680594086647, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.616583824157715, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.009223243221640587, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.151694297790527, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.015306190587580204, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.24427318572998, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.011264491826295853, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.17695689201355, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.007859851233661175, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.2048693150281906, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0009771387558430433, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.286069869995117, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0006112185074016452, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.969511032104492, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.017907897010445595, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.30577850341797, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.01636006310582161, "pnorm/_forward_module.model.norm.weight": 31.297231674194336, "gnorm/_forward_module.model.norm.weight": 0.0025962721556425095, "pnorm/_forward_module.lm_head.weight": 230.71287536621094, "gnorm/_forward_module.lm_head.weight": 0.024461310356855392} +{"step": 1887436800, "pnorm/_forward_module.model.embeddings.weight": 141.2894744873047, "gnorm/_forward_module.model.embeddings.weight": 0.038410086184740067, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.737607955932617, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0011010556481778622, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.485292434692383, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.005399423651397228, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.173933029174805, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.005866445135325193, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.735048294067383, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.04606407880783081, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.712690353393555, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.04670298099517822, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.089796543121338, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.0035340816248208284, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.34733110666275024, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0001416487357346341, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.988910675048828, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0006837777909822762, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.988420486450195, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.0268191359937191, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.105558395385742, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.023748766630887985, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.21605110168457, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0006628381670452654, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.254873275756836, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.004872964695096016, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.039706230163574, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.005515686701983213, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.17297649383545, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.02758113667368889, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.258466720581055, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.02822374925017357, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3916966915130615, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.00439113425090909, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16435880959033966, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0003131573321297765, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.41415786743164, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0006192057044245303, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.745594024658203, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.024236712604761124, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.623741149902344, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.018130887299776077, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.696517944335938, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0006528875092044473, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 13.015909194946289, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.005523327738046646, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.51181411743164, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.006866606418043375, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.659058570861816, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.02358577772974968, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.813279151916504, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.02105342596769333, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.018808126449585, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.0038811315316706896, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19207394123077393, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0002908289898186922, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.60086441040039, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0005657877190969884, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.373884201049805, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.022515997290611267, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.102298736572266, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.0172084029763937, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.231571197509766, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0005581005825661123, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.514734268188477, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.003920732066035271, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.877795219421387, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.004956468939781189, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.142410278320312, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.019370459020137787, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.457494735717773, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.020080238580703735, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.8643200397491455, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.00269776931963861, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16318635642528534, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0002130301872966811, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.24159049987793, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0009428830235265195, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.79960823059082, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.025329465046525, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.600929260253906, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.02013438008725643, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.160198211669922, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.001545641804113984, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.603516578674316, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.01484636589884758, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.751952171325684, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.018429063260555267, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.315053939819336, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.03303760290145874, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.969220161437988, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.015426358208060265, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6757097244262695, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.004543732386082411, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15154647827148438, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0005338139017112553, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.292417526245117, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.00047669338528066874, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.354211807250977, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.018476102501153946, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.127685546875, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.015696680173277855, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.760955810546875, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0005286791711114347, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.325220108032227, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.004479008261114359, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.61754322052002, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.006850536447018385, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.154020309448242, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.014215657487511635, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.247294425964355, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.010903509333729744, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.177316904067993, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.005301160272210836, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.2049180567264557, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0006440123543143272, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.286828994750977, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0005753975710831583, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.970354080200195, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.01720154844224453, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.306499481201172, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.015896346420049667, "pnorm/_forward_module.model.norm.weight": 31.303287506103516, "gnorm/_forward_module.model.norm.weight": 0.0029082116670906544, "pnorm/_forward_module.lm_head.weight": 230.7323455810547, "gnorm/_forward_module.lm_head.weight": 0.024354036897420883} +{"step": 1908408320, "pnorm/_forward_module.model.embeddings.weight": 141.28689575195312, "gnorm/_forward_module.model.embeddings.weight": 0.03953370824456215, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.736919403076172, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0010687484173104167, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.486412048339844, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.005557571072131395, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.174970626831055, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.006145953666418791, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.733595848083496, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.046261388808488846, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.71125602722168, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.047859352082014084, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0898783206939697, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.003532474162057042, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.34751713275909424, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.00020237077842466533, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.98918914794922, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0006750530446879566, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.988357543945312, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.02735157310962677, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.105392456054688, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.025260422378778458, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.215755462646484, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0007280920981429517, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.255487442016602, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.005033534485846758, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.040183067321777, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.005853482987731695, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.172103881835938, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.028949473053216934, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.257623672485352, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.028736654669046402, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3917250633239746, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.005348841194063425, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.164349764585495, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0005423775292001665, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.41475486755371, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0006507952348329127, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.745882034301758, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.024886304512619972, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.623926162719727, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.019078295677900314, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.696744918823242, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0006928078946657479, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 13.016664505004883, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.005864372942596674, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.512378692626953, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.007441548630595207, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.658556938171387, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.025273337960243225, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.812748908996582, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.021473677828907967, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.018864154815674, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.003971713595092297, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19201518595218658, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0002042171108769253, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.601215362548828, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0006280054803937674, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.3738956451416, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.02355886809527874, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.102399826049805, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.01826070249080658, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.232011795043945, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0005944207077845931, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.515812873840332, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.004173987545073032, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.878778457641602, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.005175002850592136, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.14216136932373, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.02065034955739975, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.45731258392334, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.020719099789857864, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.8645925521850586, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.002876298502087593, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.1632053554058075, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.00019709061598405242, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.24113655090332, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.001564873498864472, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.798513412475586, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.02952931821346283, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.600412368774414, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.024951167404651642, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.161096572875977, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.002608047565445304, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.604578971862793, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.023041626438498497, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.752826690673828, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.03262857347726822, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.315567016601562, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.039842505007982254, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.970245361328125, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.01586170867085457, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6755521297454834, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.0037819352000951767, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.1514965146780014, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.00025390394148416817, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.292238235473633, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0005727115203626454, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.353572845458984, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.018960289657115936, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.127498626708984, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.01617366075515747, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.762779235839844, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0006487798527814448, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.32638168334961, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.005244255065917969, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.61844253540039, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.007965208031237125, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.156042098999023, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.014728873036801815, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.249842643737793, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.01116686686873436, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.177598714828491, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.005455356556922197, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.2049495428800583, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.0005766593967564404, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.28728485107422, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0006553585990332067, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.970773696899414, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.01754908263683319, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.30698013305664, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.015997515991330147, "pnorm/_forward_module.model.norm.weight": 31.308271408081055, "gnorm/_forward_module.model.norm.weight": 0.002568097785115242, "pnorm/_forward_module.lm_head.weight": 230.74826049804688, "gnorm/_forward_module.lm_head.weight": 0.023320289328694344} +{"step": 1929379840, "pnorm/_forward_module.model.embeddings.weight": 141.28472900390625, "gnorm/_forward_module.model.embeddings.weight": 0.037811875343322754, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.736412048339844, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0010695364326238632, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.487442016601562, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.005584117956459522, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.175923347473145, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.006035377737134695, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.732439041137695, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.043960414826869965, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.710112571716309, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.04526115953922272, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.089857816696167, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.0034811983350664377, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.34762677550315857, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.00034065634827129543, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.989423751831055, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.000641494058072567, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.9882869720459, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.02616344392299652, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.105236053466797, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.024222970008850098, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.215423583984375, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0007281634025275707, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.255863189697266, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.0048576341941952705, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.040472984313965, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.005774990655481815, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.171311378479004, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.02652198076248169, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.256855964660645, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.027268853038549423, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3918092250823975, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.004387391731142998, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16434498131275177, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00016440726176369935, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.415210723876953, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0005874583730474114, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.746061325073242, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.023594066500663757, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.624046325683594, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.018327362835407257, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.696901321411133, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.000649038702249527, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 13.017169952392578, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.005972040351480246, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.512776374816895, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.007127071265131235, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.658122062683105, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.021895375102758408, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.812298774719238, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.020614417269825935, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.0190632343292236, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.0037377369590103626, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19200140237808228, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.00025559987989254296, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.601449966430664, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0005497109959833324, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.373836517333984, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.022584570571780205, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.102441787719727, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.017512718215584755, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.23256492614746, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0005541777354665101, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.517050743103027, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.0041975416243076324, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.879876136779785, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.005072102416306734, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.142014503479004, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.019333001226186752, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.457207679748535, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.02001214772462845, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.8648784160614014, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.003437439911067486, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16320912539958954, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0003130416735075414, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.240787506103516, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0012160739861428738, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.79766845703125, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.02752852626144886, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.600019454956055, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.022973135113716125, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.161802291870117, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0021456927061080933, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.605635643005371, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.019093429669737816, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.753713607788086, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.024967225268483162, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.31593132019043, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.0371691957116127, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.970969200134277, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.015183072537183762, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6753432750701904, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.0033712005242705345, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15146468579769135, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.00035670027136802673, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.29214096069336, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0005273482529446483, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.353111267089844, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.018417170271277428, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.127351760864258, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.015371647663414478, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.764217376708984, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0005956166423857212, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.32723617553711, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.004296624101698399, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.619091987609863, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.006064938846975565, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.157685279846191, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.013773739337921143, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.251908302307129, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.010714484378695488, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.1777138710021973, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.0029773779679089785, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.2049621194601059, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.000202669674763456, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.28781509399414, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0005478968378156424, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.97138786315918, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.016816968098282814, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.307498931884766, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.015443393960595131, "pnorm/_forward_module.model.norm.weight": 31.3122501373291, "gnorm/_forward_module.model.norm.weight": 0.002716219983994961, "pnorm/_forward_module.lm_head.weight": 230.76097106933594, "gnorm/_forward_module.lm_head.weight": 0.022944768890738487} +{"step": 1950351360, "pnorm/_forward_module.model.embeddings.weight": 141.28298950195312, "gnorm/_forward_module.model.embeddings.weight": 0.03689458593726158, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.735950469970703, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0010095755569636822, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.488175392150879, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.005546994041651487, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.176591873168945, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.006071293260902166, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.731477737426758, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.04286693409085274, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.709162712097168, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.04412868618965149, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.08988881111145, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.0038663099985569715, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.347726970911026, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.00024316352210007608, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.989521026611328, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0006276473286561668, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.988122940063477, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.025196384638547897, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.10506248474121, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.023078493773937225, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.215198516845703, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0006384507869370282, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.256060600280762, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.0046850242651999, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.04061222076416, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.00541264284402132, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.170764923095703, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.025776326656341553, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.256327629089355, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.026669232174754143, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3919599056243896, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.004272155463695526, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16438321769237518, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0002818430948536843, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.415552139282227, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0005980193964205682, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.74616813659668, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.023367585614323616, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.624099731445312, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.017882652580738068, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.69710922241211, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0006666852859780192, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 13.017690658569336, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.006162821315228939, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.51318645477295, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.007843746803700924, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.657795906066895, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.021849652752280235, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.81196403503418, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.019917141646146774, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.019361972808838, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.0032481651287525892, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19201330840587616, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.00016115013568196446, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.601716995239258, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0005583156598731875, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.37386703491211, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.02190527319908142, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.1025333404541, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.0167732834815979, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.232892990112305, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0005361203802749515, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.517765998840332, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.0038794102147221565, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.880512237548828, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.004696629010140896, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.141852378845215, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.018421823158860207, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.457071304321289, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.019099151715636253, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.8651885986328125, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.0028199946973472834, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16323734819889069, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0002422324614599347, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.24048614501953, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0014422446256503463, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.796964645385742, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.02793426252901554, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.59967041015625, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.021238919347524643, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.162437438964844, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0022198562510311604, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.606456756591797, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.02255948632955551, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.75438404083252, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.0318065844476223, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.3162841796875, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.026596615090966225, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.97163200378418, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.014549219980835915, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.675316095352173, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.00385885126888752, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15145714581012726, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.00024153859703801572, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.292057037353516, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.000486671895487234, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.35272216796875, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.017901292070746422, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.12723159790039, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.015091931447386742, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.76534652709961, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0006068425718694925, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.327873229980469, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.004559563938528299, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.61958122253418, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.006666353903710842, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.158982276916504, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.013149024918675423, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.253510475158691, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.01026394497603178, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.177889347076416, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.003234145464375615, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.2049812525510788, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.00021502295567188412, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.28818130493164, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0005247893859632313, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.971681594848633, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.016296198591589928, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.307863235473633, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.014790337532758713, "pnorm/_forward_module.model.norm.weight": 31.315364837646484, "gnorm/_forward_module.model.norm.weight": 0.0026170366909354925, "pnorm/_forward_module.lm_head.weight": 230.7705535888672, "gnorm/_forward_module.lm_head.weight": 0.022315412759780884} +{"step": 1971322880, "pnorm/_forward_module.model.embeddings.weight": 141.28163146972656, "gnorm/_forward_module.model.embeddings.weight": 0.03714624419808388, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.73560333251953, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0009769797325134277, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.488700866699219, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.005454860161989927, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.177069664001465, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.005985437426716089, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.73074722290039, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.04250827431678772, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.708444595336914, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.043414708226919174, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.08992075920105, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.003588038496673107, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3478126525878906, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.00031518060131929815, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.98958969116211, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0006114484858699143, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.987985610961914, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.025437239557504654, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.104928970336914, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.021480852738022804, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.215068817138672, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0006723285769112408, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.256372451782227, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.004718279466032982, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.040858268737793, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.005478980485349894, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.170361518859863, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.024720942601561546, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.255938529968262, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.0254165381193161, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.391916513442993, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.004966350272297859, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16435495018959045, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0002777695772238076, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.415870666503906, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0005437562358565629, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.74631690979004, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.02275421842932701, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.624197006225586, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.01697017252445221, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.697269439697266, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0005996290710754693, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 13.018145561218262, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.00539540546014905, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.51353645324707, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.006449875887483358, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.657549858093262, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.020065657794475555, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.81170654296875, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.019062740728259087, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.019453763961792, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.004369224887341261, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19201131165027618, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.0002644858614075929, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.601911544799805, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0005706973606720567, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.373882293701172, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.02140578255057335, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.102596282958984, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.016052158549427986, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.233156204223633, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0005129198543727398, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.518380165100098, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.0039009128231555223, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.881067276000977, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.0048246318474411964, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.141744613647461, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.017115961760282516, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.456989288330078, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.01855389028787613, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.8652546405792236, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.0027182616759091616, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16322094202041626, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.00014290498802438378, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.240278244018555, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0008843920659273863, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.796438217163086, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.024233359843492508, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.599416732788086, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.017298690974712372, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.162927627563477, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0013226321898400784, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.607057571411133, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.013760835863649845, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.754888534545898, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.01792994514107704, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.316516876220703, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.025602849200367928, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.972087860107422, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.014185556210577488, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6753926277160645, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.0030774488113820553, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15146328508853912, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.00019492160936351866, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.291988372802734, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0004489283310249448, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.352413177490234, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.017554273828864098, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.127134323120117, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.01489537674933672, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.76619529724121, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0005386365228332579, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.328340530395508, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.004195576999336481, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.619928359985352, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.006041112821549177, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.15998649597168, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.012838100083172321, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.25474739074707, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.010215546935796738, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.1779093742370605, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.002948185196146369, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.20497655868530273, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.00018144310161005706, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.28835105895996, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0005168091156519949, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.971784591674805, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.01634378731250763, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.308048248291016, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.015429461374878883, "pnorm/_forward_module.model.norm.weight": 31.31772232055664, "gnorm/_forward_module.model.norm.weight": 0.0031253325287252665, "pnorm/_forward_module.lm_head.weight": 230.77780151367188, "gnorm/_forward_module.lm_head.weight": 0.02285950817167759} +{"step": 1992294400, "pnorm/_forward_module.model.embeddings.weight": 141.28062438964844, "gnorm/_forward_module.model.embeddings.weight": 0.03691533952951431, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.735363006591797, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0010218878742307425, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.489118576049805, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.005533520597964525, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.177452087402344, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.005981964059174061, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.730216979980469, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.04156123846769333, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.707921981811523, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.0429183728992939, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.08988094329834, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.003625147510319948, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3478550612926483, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0002337155310669914, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.9896183013916, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0005946115124970675, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.98785400390625, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.02489541657269001, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.10481071472168, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.021443499252200127, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.21493911743164, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0005932141211815178, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.256562232971191, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.004671309143304825, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.041007995605469, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.005307964980602264, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.170026779174805, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.024400318041443825, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.255615234375, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.024956567212939262, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3919053077697754, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.004353972151875496, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16434577107429504, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00038962741382420063, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.416120529174805, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0006012835074216127, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.746429443359375, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.022670894861221313, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.624267578125, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.016824040561914444, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.697389602661133, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0006034219986759126, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 13.01845932006836, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.005383907351642847, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.513777732849121, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.006342597771435976, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.657366752624512, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.019772473722696304, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.811516761779785, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.01903282292187214, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.019543170928955, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.003483912907540798, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19200877845287323, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.00020665193733293563, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.60200309753418, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0005335774621926248, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.37383270263672, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.02130788564682007, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.102598190307617, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.016015876084566116, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.233381271362305, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0005566891049966216, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.518847465515137, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.00392846018075943, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.881484985351562, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.004835184197872877, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.14169979095459, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.016986915841698647, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.456954002380371, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.018747584894299507, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.865299701690674, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.0028085445519536734, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16320861876010895, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.0002012563491007313, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.240087509155273, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0008585589239373803, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.796003341674805, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.024222593754529953, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.599185943603516, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.01762247644364834, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.163305282592773, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.001365387230180204, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.607503890991211, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.013698899187147617, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.755261421203613, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.01787969283759594, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.316720962524414, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.026759367436170578, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.972454071044922, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.014111301861703396, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6753835678100586, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.0032405098900198936, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15145444869995117, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.00018720829393714666, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.29191780090332, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.00046834253589622676, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.352161407470703, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.01743607223033905, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.127056121826172, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.014688883908092976, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.766857147216797, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.00047966104466468096, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.328662872314453, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.004282352514564991, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.620160102844238, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.006181922275573015, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.160760879516602, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.012311853468418121, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.255694389343262, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.010091355070471764, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.177948236465454, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.0033693797886371613, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.2049739509820938, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.00028901922632940114, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.28852653503418, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0005188643117435277, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.971920013427734, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.01627880148589611, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.308177947998047, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.015148650854825974, "pnorm/_forward_module.model.norm.weight": 31.319425582885742, "gnorm/_forward_module.model.norm.weight": 0.002887467620894313, "pnorm/_forward_module.lm_head.weight": 230.78306579589844, "gnorm/_forward_module.lm_head.weight": 0.02226090244948864} +{"step": 2013265920, "pnorm/_forward_module.model.embeddings.weight": 141.27993774414062, "gnorm/_forward_module.model.embeddings.weight": 0.03747823089361191, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.735191345214844, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0010185347637161613, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.489347457885742, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.005549980327486992, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.17765998840332, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.005943977274000645, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.72985553741455, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.042497336864471436, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.707566261291504, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.04435905069112778, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0899100303649902, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.0036047901958227158, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.34790655970573425, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.00010910534911090508, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.989688873291016, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0006407328182831407, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.987812042236328, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.025843758136034012, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.104753494262695, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.0230922419577837, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.21483039855957, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0006017693085595965, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.256625175476074, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.00464570801705122, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.041040420532227, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.005252313334494829, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.169785499572754, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.02503127232193947, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.255382537841797, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.026073075830936432, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.391916513442993, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.004365789238363504, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16436320543289185, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.0002985032624565065, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.416290283203125, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0006468048668466508, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.74650764465332, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.02373587153851986, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.624311447143555, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.01805637590587139, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.697460174560547, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.000792437931522727, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 13.018672943115234, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.007164910435676575, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.513936996459961, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.009789429605007172, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.657228469848633, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.021507184952497482, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.811369895935059, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.019680479541420937, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.0195679664611816, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.0035784225910902023, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.192015141248703, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.00021380592079367489, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.602100372314453, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0005845123087055981, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.373828887939453, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.022169306874275208, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.102611541748047, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.01663539744913578, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.23349952697754, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0005546119064092636, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.519099235534668, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.004032221622765064, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.881718635559082, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.004873660393059254, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.141646385192871, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.017235038802027702, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.4569091796875, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.019076529890298843, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.8653509616851807, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.002921423641964793, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.1632089465856552, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.00024164613569155335, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.239944458007812, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.000977479387074709, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.795679092407227, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.026567259803414345, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.59900665283203, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.017953520640730858, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.163557052612305, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0014619502471759915, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.60777473449707, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.014479709789156914, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.75548267364502, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.020168529823422432, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.316859245300293, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.026882417500019073, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.972685813903809, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.014439310878515244, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.675394296646118, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.0035303891636431217, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.1514485776424408, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.0002780020877253264, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.291885375976562, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.000471546285552904, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.35200309753418, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.017672430723905563, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.127010345458984, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.014885174110531807, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.767290115356445, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0005472481134347618, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.328874588012695, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.0042878263629972935, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.620318412780762, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.00602493342012167, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.161267280578613, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.012803500518202782, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.256308555603027, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.010325102135539055, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.1780014038085938, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.0032446940895169973, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.20497965812683105, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.00023066364519763738, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.288665771484375, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0005459982203319669, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.972036361694336, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.016636217013001442, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.30829620361328, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.015072420239448547, "pnorm/_forward_module.model.norm.weight": 31.32056999206543, "gnorm/_forward_module.model.norm.weight": 0.00280367280356586, "pnorm/_forward_module.lm_head.weight": 230.78652954101562, "gnorm/_forward_module.lm_head.weight": 0.02283090353012085} +{"step": 2034237440, "pnorm/_forward_module.model.embeddings.weight": 141.27951049804688, "gnorm/_forward_module.model.embeddings.weight": 0.035891737788915634, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.73508644104004, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0009865817846730351, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.489511489868164, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.005259076599031687, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.177809715270996, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.005680103786289692, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.729622840881348, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.041164278984069824, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.7073392868042, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.04215937480330467, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.0899078845977783, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.0034073118586093187, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3479275107383728, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.00028215604834258556, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.98969841003418, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0006485178018920124, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.987756729125977, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.024631304666399956, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.104698181152344, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.021275196224451065, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.214780807495117, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0006411009235307574, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.256712913513184, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.004607220180332661, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.041107177734375, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.005223445128649473, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.169647216796875, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.024506401270627975, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.255249977111816, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.024652449414134026, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3919036388397217, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.004020797088742256, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16436688601970673, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00019955966854467988, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.4163818359375, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0005308366962708533, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.746540069580078, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.022243579849600792, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.624326705932617, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.0165592972189188, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.697532653808594, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0006082403124310076, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 13.01883316040039, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.005449639167636633, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.514059066772461, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.0068230582401156425, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.657176971435547, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.020336279645562172, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.811315536499023, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.018596498295664787, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.0195512771606445, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.003400309942662716, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19201135635375977, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.00015773254563100636, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.60215950012207, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.0005381385562941432, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.37382698059082, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.02075265906751156, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.10262107849121, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.01563592255115509, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.23358154296875, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.0005324099329300225, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.519268035888672, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.0038481152150779963, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.881865501403809, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.004637409467250109, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.141616821289062, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.016603710129857063, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.456888198852539, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.017923688516020775, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.865384817123413, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.003037144662812352, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.16320641338825226, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.00030970873194746673, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.239877700805664, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0009621047647669911, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.795513153076172, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.023897208273410797, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.598926544189453, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.017309844493865967, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.163728713989258, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0015076800482347608, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.607951164245605, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.014133074320852757, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.75562858581543, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.020300615578889847, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.3169584274292, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.024503719061613083, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.972844123840332, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.013736321590840816, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6754074096679688, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.0033214977011084557, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.1514497548341751, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.00027573955594561994, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.29184913635254, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.0004525565600488335, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.351886749267578, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.017183246091008186, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.126968383789062, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.0145339360460639, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.767576217651367, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.0004677878168877214, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.329041481018066, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.00419237045571208, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.620448112487793, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.006332274992018938, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.16159439086914, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.011935897171497345, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.256695747375488, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.009929482825100422, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.178053617477417, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.0032545761205255985, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.20498211681842804, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.00024216128804255277, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.288719177246094, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.0005303456564433873, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.972063064575195, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.01603449136018753, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.3083438873291, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.014619146473705769, "pnorm/_forward_module.model.norm.weight": 31.321271896362305, "gnorm/_forward_module.model.norm.weight": 0.0029856753535568714, "pnorm/_forward_module.lm_head.weight": 230.78866577148438, "gnorm/_forward_module.lm_head.weight": 0.022005567327141762} +{"step": 2055208960, "pnorm/_forward_module.model.embeddings.weight": 141.27926635742188, "gnorm/_forward_module.model.embeddings.weight": 0.035828955471515656, "pnorm/_forward_module.model.layers.0.attn_norm.weight": 22.735036849975586, "gnorm/_forward_module.model.layers.0.attn_norm.weight": 0.0009216683683916926, "pnorm/_forward_module.model.layers.0.attn.q_proj.weight": 15.489602088928223, "gnorm/_forward_module.model.layers.0.attn.q_proj.weight": 0.005290591157972813, "pnorm/_forward_module.model.layers.0.attn.k_proj.weight": 15.177895545959473, "gnorm/_forward_module.model.layers.0.attn.k_proj.weight": 0.005758250132203102, "pnorm/_forward_module.model.layers.0.attn.v_proj.weight": 10.729506492614746, "gnorm/_forward_module.model.layers.0.attn.v_proj.weight": 0.04101638123393059, "pnorm/_forward_module.model.layers.0.attn.o_proj.weight": 10.707225799560547, "gnorm/_forward_module.model.layers.0.attn.o_proj.weight": 0.041912343353033066, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 2.089909553527832, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.weight": 0.0034380306024104357, "pnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.3479435443878174, "gnorm/_forward_module.model.layers.0.attn.fgate_proj.bias": 0.0001478599151596427, "pnorm/_forward_module.model.layers.0.mlp_norm.weight": 21.989715576171875, "gnorm/_forward_module.model.layers.0.mlp_norm.weight": 0.0006354718352667987, "pnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 26.987735748291016, "gnorm/_forward_module.model.layers.0.mlp.gate_proj.weight": 0.024493178352713585, "pnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 19.10467529296875, "gnorm/_forward_module.model.layers.0.mlp.down_proj.weight": 0.02171187847852707, "pnorm/_forward_module.model.layers.1.attn_norm.weight": 22.214744567871094, "gnorm/_forward_module.model.layers.1.attn_norm.weight": 0.0006052698590792716, "pnorm/_forward_module.model.layers.1.attn.q_proj.weight": 12.25674057006836, "gnorm/_forward_module.model.layers.1.attn.q_proj.weight": 0.004515502601861954, "pnorm/_forward_module.model.layers.1.attn.k_proj.weight": 12.041125297546387, "gnorm/_forward_module.model.layers.1.attn.k_proj.weight": 0.0051620532758533955, "pnorm/_forward_module.model.layers.1.attn.v_proj.weight": 10.169564247131348, "gnorm/_forward_module.model.layers.1.attn.v_proj.weight": 0.02396375499665737, "pnorm/_forward_module.model.layers.1.attn.o_proj.weight": 10.255170822143555, "gnorm/_forward_module.model.layers.1.attn.o_proj.weight": 0.024468552321195602, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 2.3919155597686768, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.weight": 0.004134598653763533, "pnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.16437029838562012, "gnorm/_forward_module.model.layers.1.attn.fgate_proj.bias": 0.00021656013268511742, "pnorm/_forward_module.model.layers.1.mlp_norm.weight": 22.416425704956055, "gnorm/_forward_module.model.layers.1.mlp_norm.weight": 0.0005786814726889133, "pnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 27.746551513671875, "gnorm/_forward_module.model.layers.1.mlp.gate_proj.weight": 0.022026963531970978, "pnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 19.624332427978516, "gnorm/_forward_module.model.layers.1.mlp.down_proj.weight": 0.016732778400182724, "pnorm/_forward_module.model.layers.2.attn_norm.weight": 22.697553634643555, "gnorm/_forward_module.model.layers.2.attn_norm.weight": 0.0005977341788820922, "pnorm/_forward_module.model.layers.2.attn.q_proj.weight": 13.018892288208008, "gnorm/_forward_module.model.layers.2.attn.q_proj.weight": 0.005284285172820091, "pnorm/_forward_module.model.layers.2.attn.k_proj.weight": 12.514102935791016, "gnorm/_forward_module.model.layers.2.attn.k_proj.weight": 0.006200190167874098, "pnorm/_forward_module.model.layers.2.attn.v_proj.weight": 10.657135963439941, "gnorm/_forward_module.model.layers.2.attn.v_proj.weight": 0.02030697837471962, "pnorm/_forward_module.model.layers.2.attn.o_proj.weight": 10.811271667480469, "gnorm/_forward_module.model.layers.2.attn.o_proj.weight": 0.018755359575152397, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 3.019561529159546, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.weight": 0.003117887070402503, "pnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.19201229512691498, "gnorm/_forward_module.model.layers.2.attn.fgate_proj.bias": 0.00017809169366955757, "pnorm/_forward_module.model.layers.2.mlp_norm.weight": 22.6021728515625, "gnorm/_forward_module.model.layers.2.mlp_norm.weight": 0.000598133250605315, "pnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 28.373804092407227, "gnorm/_forward_module.model.layers.2.mlp.gate_proj.weight": 0.02100536786019802, "pnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 20.10261344909668, "gnorm/_forward_module.model.layers.2.mlp.down_proj.weight": 0.015991073101758957, "pnorm/_forward_module.model.layers.3.attn_norm.weight": 23.23363494873047, "gnorm/_forward_module.model.layers.3.attn_norm.weight": 0.00048346578842028975, "pnorm/_forward_module.model.layers.3.attn.q_proj.weight": 14.519370079040527, "gnorm/_forward_module.model.layers.3.attn.q_proj.weight": 0.003839048556983471, "pnorm/_forward_module.model.layers.3.attn.k_proj.weight": 13.881957054138184, "gnorm/_forward_module.model.layers.3.attn.k_proj.weight": 0.00462738424539566, "pnorm/_forward_module.model.layers.3.attn.v_proj.weight": 11.141602516174316, "gnorm/_forward_module.model.layers.3.attn.v_proj.weight": 0.017052920535206795, "pnorm/_forward_module.model.layers.3.attn.o_proj.weight": 11.456875801086426, "gnorm/_forward_module.model.layers.3.attn.o_proj.weight": 0.018348300829529762, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 2.865417242050171, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.weight": 0.0024783355183899403, "pnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.163208469748497, "gnorm/_forward_module.model.layers.3.attn.fgate_proj.bias": 0.00015571669791825116, "pnorm/_forward_module.model.layers.3.mlp_norm.weight": 22.239831924438477, "gnorm/_forward_module.model.layers.3.mlp_norm.weight": 0.0013289490016177297, "pnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 27.7954158782959, "gnorm/_forward_module.model.layers.3.mlp.gate_proj.weight": 0.02637249417603016, "pnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 19.598878860473633, "gnorm/_forward_module.model.layers.3.mlp.down_proj.weight": 0.019196845591068268, "pnorm/_forward_module.model.layers.4.attn_norm.weight": 24.1638240814209, "gnorm/_forward_module.model.layers.4.attn_norm.weight": 0.0019720392301678658, "pnorm/_forward_module.model.layers.4.attn.q_proj.weight": 14.608060836791992, "gnorm/_forward_module.model.layers.4.attn.q_proj.weight": 0.018622737377882004, "pnorm/_forward_module.model.layers.4.attn.k_proj.weight": 13.755719184875488, "gnorm/_forward_module.model.layers.4.attn.k_proj.weight": 0.027003159746527672, "pnorm/_forward_module.model.layers.4.attn.v_proj.weight": 12.317009925842285, "gnorm/_forward_module.model.layers.4.attn.v_proj.weight": 0.024990776553750038, "pnorm/_forward_module.model.layers.4.attn.o_proj.weight": 13.972935676574707, "gnorm/_forward_module.model.layers.4.attn.o_proj.weight": 0.014027920551598072, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 2.6753878593444824, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.weight": 0.0034933581482619047, "pnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.15144643187522888, "gnorm/_forward_module.model.layers.4.attn.fgate_proj.bias": 0.00029174931114539504, "pnorm/_forward_module.model.layers.4.mlp_norm.weight": 23.291828155517578, "gnorm/_forward_module.model.layers.4.mlp_norm.weight": 0.00044949696166440845, "pnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 30.35182762145996, "gnorm/_forward_module.model.layers.4.mlp.gate_proj.weight": 0.01763206347823143, "pnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 21.1269474029541, "gnorm/_forward_module.model.layers.4.mlp.down_proj.weight": 0.014681817963719368, "pnorm/_forward_module.model.layers.5.attn_norm.weight": 23.767732620239258, "gnorm/_forward_module.model.layers.5.attn_norm.weight": 0.00047209148760885, "pnorm/_forward_module.model.layers.5.attn.q_proj.weight": 14.329133033752441, "gnorm/_forward_module.model.layers.5.attn.q_proj.weight": 0.004112453665584326, "pnorm/_forward_module.model.layers.5.attn.k_proj.weight": 13.62052059173584, "gnorm/_forward_module.model.layers.5.attn.k_proj.weight": 0.005898597184568644, "pnorm/_forward_module.model.layers.5.attn.v_proj.weight": 12.161765098571777, "gnorm/_forward_module.model.layers.5.attn.v_proj.weight": 0.012562797404825687, "pnorm/_forward_module.model.layers.5.attn.o_proj.weight": 13.256896018981934, "gnorm/_forward_module.model.layers.5.attn.o_proj.weight": 0.009957646019756794, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 3.178103446960449, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.weight": 0.0027153410483151674, "pnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.20498774945735931, "gnorm/_forward_module.model.layers.5.attn.fgate_proj.bias": 0.00013712180953007191, "pnorm/_forward_module.model.layers.5.mlp_norm.weight": 23.28875732421875, "gnorm/_forward_module.model.layers.5.mlp_norm.weight": 0.000552800833247602, "pnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 30.972087860107422, "gnorm/_forward_module.model.layers.5.mlp.gate_proj.weight": 0.01612411066889763, "pnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 21.30837631225586, "gnorm/_forward_module.model.layers.5.mlp.down_proj.weight": 0.014723340049386024, "pnorm/_forward_module.model.norm.weight": 31.321638107299805, "gnorm/_forward_module.model.norm.weight": 0.0027449321933090687, "pnorm/_forward_module.lm_head.weight": 230.78976440429688, "gnorm/_forward_module.lm_head.weight": 0.022977465763688087}