diff --git "a/model_analysis.json" "b/model_analysis.json" new file mode 100644--- /dev/null +++ "b/model_analysis.json" @@ -0,0 +1,24657 @@ +{ + "layer_types": { + "transformer": 391 + }, + "parameter_counts": { + "transformer.time_embed.time_mlp.0.weight": 262144, + "transformer.time_embed.time_mlp.0.bias": 1024, + "transformer.time_embed.time_mlp.2.weight": 1048576, + "transformer.time_embed.time_mlp.2.bias": 1024, + "transformer.text_embed.text_embed.weight": 254600, + "transformer.input_embed.proj.weight": 307200, + "transformer.input_embed.proj.bias": 1024, + "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, + "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, + "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, + "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, + "transformer.layers.0.1.g": 1024, + "transformer.layers.0.2.to_q.weight": 1048576, + "transformer.layers.0.2.to_q.bias": 1024, + "transformer.layers.0.2.to_k.weight": 1048576, + "transformer.layers.0.2.to_k.bias": 1024, + "transformer.layers.0.2.to_v.weight": 1048576, + "transformer.layers.0.2.to_v.bias": 1024, + "transformer.layers.0.2.to_out.0.weight": 1048576, + "transformer.layers.0.2.to_out.0.bias": 1024, + "transformer.layers.0.3.g": 1024, + "transformer.layers.0.4.ff.0.0.weight": 4194304, + "transformer.layers.0.4.ff.0.0.bias": 4096, + "transformer.layers.0.4.ff.2.weight": 4194304, + "transformer.layers.0.4.ff.2.bias": 1024, + "transformer.layers.1.1.g": 1024, + "transformer.layers.1.2.to_q.weight": 1048576, + "transformer.layers.1.2.to_q.bias": 1024, + "transformer.layers.1.2.to_k.weight": 1048576, + "transformer.layers.1.2.to_k.bias": 1024, + "transformer.layers.1.2.to_v.weight": 1048576, + "transformer.layers.1.2.to_v.bias": 1024, + "transformer.layers.1.2.to_out.0.weight": 1048576, + "transformer.layers.1.2.to_out.0.bias": 1024, + "transformer.layers.1.3.g": 1024, + "transformer.layers.1.4.ff.0.0.weight": 4194304, + "transformer.layers.1.4.ff.0.0.bias": 4096, + "transformer.layers.1.4.ff.2.weight": 4194304, + "transformer.layers.1.4.ff.2.bias": 1024, + "transformer.layers.2.1.g": 1024, + "transformer.layers.2.2.to_q.weight": 1048576, + "transformer.layers.2.2.to_q.bias": 1024, + "transformer.layers.2.2.to_k.weight": 1048576, + "transformer.layers.2.2.to_k.bias": 1024, + "transformer.layers.2.2.to_v.weight": 1048576, + "transformer.layers.2.2.to_v.bias": 1024, + "transformer.layers.2.2.to_out.0.weight": 1048576, + "transformer.layers.2.2.to_out.0.bias": 1024, + "transformer.layers.2.3.g": 1024, + "transformer.layers.2.4.ff.0.0.weight": 4194304, + "transformer.layers.2.4.ff.0.0.bias": 4096, + "transformer.layers.2.4.ff.2.weight": 4194304, + "transformer.layers.2.4.ff.2.bias": 1024, + "transformer.layers.3.1.g": 1024, + "transformer.layers.3.2.to_q.weight": 1048576, + "transformer.layers.3.2.to_q.bias": 1024, + "transformer.layers.3.2.to_k.weight": 1048576, + "transformer.layers.3.2.to_k.bias": 1024, + "transformer.layers.3.2.to_v.weight": 1048576, + "transformer.layers.3.2.to_v.bias": 1024, + "transformer.layers.3.2.to_out.0.weight": 1048576, + "transformer.layers.3.2.to_out.0.bias": 1024, + "transformer.layers.3.3.g": 1024, + "transformer.layers.3.4.ff.0.0.weight": 4194304, + "transformer.layers.3.4.ff.0.0.bias": 4096, + "transformer.layers.3.4.ff.2.weight": 4194304, + "transformer.layers.3.4.ff.2.bias": 1024, + "transformer.layers.4.1.g": 1024, + "transformer.layers.4.2.to_q.weight": 1048576, + "transformer.layers.4.2.to_q.bias": 1024, + "transformer.layers.4.2.to_k.weight": 1048576, + "transformer.layers.4.2.to_k.bias": 1024, + "transformer.layers.4.2.to_v.weight": 1048576, + "transformer.layers.4.2.to_v.bias": 1024, + "transformer.layers.4.2.to_out.0.weight": 1048576, + "transformer.layers.4.2.to_out.0.bias": 1024, + "transformer.layers.4.3.g": 1024, + "transformer.layers.4.4.ff.0.0.weight": 4194304, + "transformer.layers.4.4.ff.0.0.bias": 4096, + "transformer.layers.4.4.ff.2.weight": 4194304, + "transformer.layers.4.4.ff.2.bias": 1024, + "transformer.layers.5.1.g": 1024, + "transformer.layers.5.2.to_q.weight": 1048576, + "transformer.layers.5.2.to_q.bias": 1024, + "transformer.layers.5.2.to_k.weight": 1048576, + "transformer.layers.5.2.to_k.bias": 1024, + "transformer.layers.5.2.to_v.weight": 1048576, + "transformer.layers.5.2.to_v.bias": 1024, + "transformer.layers.5.2.to_out.0.weight": 1048576, + "transformer.layers.5.2.to_out.0.bias": 1024, + "transformer.layers.5.3.g": 1024, + "transformer.layers.5.4.ff.0.0.weight": 4194304, + "transformer.layers.5.4.ff.0.0.bias": 4096, + "transformer.layers.5.4.ff.2.weight": 4194304, + "transformer.layers.5.4.ff.2.bias": 1024, + "transformer.layers.6.1.g": 1024, + "transformer.layers.6.2.to_q.weight": 1048576, + "transformer.layers.6.2.to_q.bias": 1024, + "transformer.layers.6.2.to_k.weight": 1048576, + "transformer.layers.6.2.to_k.bias": 1024, + "transformer.layers.6.2.to_v.weight": 1048576, + "transformer.layers.6.2.to_v.bias": 1024, + "transformer.layers.6.2.to_out.0.weight": 1048576, + "transformer.layers.6.2.to_out.0.bias": 1024, + "transformer.layers.6.3.g": 1024, + "transformer.layers.6.4.ff.0.0.weight": 4194304, + "transformer.layers.6.4.ff.0.0.bias": 4096, + "transformer.layers.6.4.ff.2.weight": 4194304, + "transformer.layers.6.4.ff.2.bias": 1024, + "transformer.layers.7.1.g": 1024, + "transformer.layers.7.2.to_q.weight": 1048576, + "transformer.layers.7.2.to_q.bias": 1024, + "transformer.layers.7.2.to_k.weight": 1048576, + "transformer.layers.7.2.to_k.bias": 1024, + "transformer.layers.7.2.to_v.weight": 1048576, + "transformer.layers.7.2.to_v.bias": 1024, + "transformer.layers.7.2.to_out.0.weight": 1048576, + "transformer.layers.7.2.to_out.0.bias": 1024, + "transformer.layers.7.3.g": 1024, + "transformer.layers.7.4.ff.0.0.weight": 4194304, + "transformer.layers.7.4.ff.0.0.bias": 4096, + "transformer.layers.7.4.ff.2.weight": 4194304, + "transformer.layers.7.4.ff.2.bias": 1024, + "transformer.layers.8.1.g": 1024, + "transformer.layers.8.2.to_q.weight": 1048576, + "transformer.layers.8.2.to_q.bias": 1024, + "transformer.layers.8.2.to_k.weight": 1048576, + "transformer.layers.8.2.to_k.bias": 1024, + "transformer.layers.8.2.to_v.weight": 1048576, + "transformer.layers.8.2.to_v.bias": 1024, + "transformer.layers.8.2.to_out.0.weight": 1048576, + "transformer.layers.8.2.to_out.0.bias": 1024, + "transformer.layers.8.3.g": 1024, + "transformer.layers.8.4.ff.0.0.weight": 4194304, + "transformer.layers.8.4.ff.0.0.bias": 4096, + "transformer.layers.8.4.ff.2.weight": 4194304, + "transformer.layers.8.4.ff.2.bias": 1024, + "transformer.layers.9.1.g": 1024, + "transformer.layers.9.2.to_q.weight": 1048576, + "transformer.layers.9.2.to_q.bias": 1024, + "transformer.layers.9.2.to_k.weight": 1048576, + "transformer.layers.9.2.to_k.bias": 1024, + "transformer.layers.9.2.to_v.weight": 1048576, + "transformer.layers.9.2.to_v.bias": 1024, + "transformer.layers.9.2.to_out.0.weight": 1048576, + "transformer.layers.9.2.to_out.0.bias": 1024, + "transformer.layers.9.3.g": 1024, + "transformer.layers.9.4.ff.0.0.weight": 4194304, + "transformer.layers.9.4.ff.0.0.bias": 4096, + "transformer.layers.9.4.ff.2.weight": 4194304, + "transformer.layers.9.4.ff.2.bias": 1024, + "transformer.layers.10.1.g": 1024, + "transformer.layers.10.2.to_q.weight": 1048576, + "transformer.layers.10.2.to_q.bias": 1024, + "transformer.layers.10.2.to_k.weight": 1048576, + "transformer.layers.10.2.to_k.bias": 1024, + "transformer.layers.10.2.to_v.weight": 1048576, + "transformer.layers.10.2.to_v.bias": 1024, + "transformer.layers.10.2.to_out.0.weight": 1048576, + "transformer.layers.10.2.to_out.0.bias": 1024, + "transformer.layers.10.3.g": 1024, + "transformer.layers.10.4.ff.0.0.weight": 4194304, + "transformer.layers.10.4.ff.0.0.bias": 4096, + "transformer.layers.10.4.ff.2.weight": 4194304, + "transformer.layers.10.4.ff.2.bias": 1024, + "transformer.layers.11.1.g": 1024, + "transformer.layers.11.2.to_q.weight": 1048576, + "transformer.layers.11.2.to_q.bias": 1024, + "transformer.layers.11.2.to_k.weight": 1048576, + "transformer.layers.11.2.to_k.bias": 1024, + "transformer.layers.11.2.to_v.weight": 1048576, + "transformer.layers.11.2.to_v.bias": 1024, + "transformer.layers.11.2.to_out.0.weight": 1048576, + "transformer.layers.11.2.to_out.0.bias": 1024, + "transformer.layers.11.3.g": 1024, + "transformer.layers.11.4.ff.0.0.weight": 4194304, + "transformer.layers.11.4.ff.0.0.bias": 4096, + "transformer.layers.11.4.ff.2.weight": 4194304, + "transformer.layers.11.4.ff.2.bias": 1024, + "transformer.layers.12.1.g": 1024, + "transformer.layers.12.2.to_q.weight": 1048576, + "transformer.layers.12.2.to_q.bias": 1024, + "transformer.layers.12.2.to_k.weight": 1048576, + "transformer.layers.12.2.to_k.bias": 1024, + "transformer.layers.12.2.to_v.weight": 1048576, + "transformer.layers.12.2.to_v.bias": 1024, + "transformer.layers.12.2.to_out.0.weight": 1048576, + "transformer.layers.12.2.to_out.0.bias": 1024, + "transformer.layers.12.3.g": 1024, + "transformer.layers.12.4.ff.0.0.weight": 4194304, + "transformer.layers.12.4.ff.0.0.bias": 4096, + "transformer.layers.12.4.ff.2.weight": 4194304, + "transformer.layers.12.4.ff.2.bias": 1024, + "transformer.layers.13.0.weight": 2097152, + "transformer.layers.13.1.g": 1024, + "transformer.layers.13.2.to_q.weight": 1048576, + "transformer.layers.13.2.to_q.bias": 1024, + "transformer.layers.13.2.to_k.weight": 1048576, + "transformer.layers.13.2.to_k.bias": 1024, + "transformer.layers.13.2.to_v.weight": 1048576, + "transformer.layers.13.2.to_v.bias": 1024, + "transformer.layers.13.2.to_out.0.weight": 1048576, + "transformer.layers.13.2.to_out.0.bias": 1024, + "transformer.layers.13.3.g": 1024, + "transformer.layers.13.4.ff.0.0.weight": 4194304, + "transformer.layers.13.4.ff.0.0.bias": 4096, + "transformer.layers.13.4.ff.2.weight": 4194304, + "transformer.layers.13.4.ff.2.bias": 1024, + "transformer.layers.14.0.weight": 2097152, + "transformer.layers.14.1.g": 1024, + "transformer.layers.14.2.to_q.weight": 1048576, + "transformer.layers.14.2.to_q.bias": 1024, + "transformer.layers.14.2.to_k.weight": 1048576, + "transformer.layers.14.2.to_k.bias": 1024, + "transformer.layers.14.2.to_v.weight": 1048576, + "transformer.layers.14.2.to_v.bias": 1024, + "transformer.layers.14.2.to_out.0.weight": 1048576, + "transformer.layers.14.2.to_out.0.bias": 1024, + "transformer.layers.14.3.g": 1024, + "transformer.layers.14.4.ff.0.0.weight": 4194304, + "transformer.layers.14.4.ff.0.0.bias": 4096, + "transformer.layers.14.4.ff.2.weight": 4194304, + "transformer.layers.14.4.ff.2.bias": 1024, + "transformer.layers.15.0.weight": 2097152, + "transformer.layers.15.1.g": 1024, + "transformer.layers.15.2.to_q.weight": 1048576, + "transformer.layers.15.2.to_q.bias": 1024, + "transformer.layers.15.2.to_k.weight": 1048576, + "transformer.layers.15.2.to_k.bias": 1024, + "transformer.layers.15.2.to_v.weight": 1048576, + "transformer.layers.15.2.to_v.bias": 1024, + "transformer.layers.15.2.to_out.0.weight": 1048576, + "transformer.layers.15.2.to_out.0.bias": 1024, + "transformer.layers.15.3.g": 1024, + "transformer.layers.15.4.ff.0.0.weight": 4194304, + "transformer.layers.15.4.ff.0.0.bias": 4096, + "transformer.layers.15.4.ff.2.weight": 4194304, + "transformer.layers.15.4.ff.2.bias": 1024, + "transformer.layers.16.0.weight": 2097152, + "transformer.layers.16.1.g": 1024, + "transformer.layers.16.2.to_q.weight": 1048576, + "transformer.layers.16.2.to_q.bias": 1024, + "transformer.layers.16.2.to_k.weight": 1048576, + "transformer.layers.16.2.to_k.bias": 1024, + "transformer.layers.16.2.to_v.weight": 1048576, + "transformer.layers.16.2.to_v.bias": 1024, + "transformer.layers.16.2.to_out.0.weight": 1048576, + "transformer.layers.16.2.to_out.0.bias": 1024, + "transformer.layers.16.3.g": 1024, + "transformer.layers.16.4.ff.0.0.weight": 4194304, + "transformer.layers.16.4.ff.0.0.bias": 4096, + "transformer.layers.16.4.ff.2.weight": 4194304, + "transformer.layers.16.4.ff.2.bias": 1024, + "transformer.layers.17.0.weight": 2097152, + "transformer.layers.17.1.g": 1024, + "transformer.layers.17.2.to_q.weight": 1048576, + "transformer.layers.17.2.to_q.bias": 1024, + "transformer.layers.17.2.to_k.weight": 1048576, + "transformer.layers.17.2.to_k.bias": 1024, + "transformer.layers.17.2.to_v.weight": 1048576, + "transformer.layers.17.2.to_v.bias": 1024, + "transformer.layers.17.2.to_out.0.weight": 1048576, + "transformer.layers.17.2.to_out.0.bias": 1024, + "transformer.layers.17.3.g": 1024, + "transformer.layers.17.4.ff.0.0.weight": 4194304, + "transformer.layers.17.4.ff.0.0.bias": 4096, + "transformer.layers.17.4.ff.2.weight": 4194304, + "transformer.layers.17.4.ff.2.bias": 1024, + "transformer.layers.18.0.weight": 2097152, + "transformer.layers.18.1.g": 1024, + "transformer.layers.18.2.to_q.weight": 1048576, + "transformer.layers.18.2.to_q.bias": 1024, + "transformer.layers.18.2.to_k.weight": 1048576, + "transformer.layers.18.2.to_k.bias": 1024, + "transformer.layers.18.2.to_v.weight": 1048576, + "transformer.layers.18.2.to_v.bias": 1024, + "transformer.layers.18.2.to_out.0.weight": 1048576, + "transformer.layers.18.2.to_out.0.bias": 1024, + "transformer.layers.18.3.g": 1024, + "transformer.layers.18.4.ff.0.0.weight": 4194304, + "transformer.layers.18.4.ff.0.0.bias": 4096, + "transformer.layers.18.4.ff.2.weight": 4194304, + "transformer.layers.18.4.ff.2.bias": 1024, + "transformer.layers.19.0.weight": 2097152, + "transformer.layers.19.1.g": 1024, + "transformer.layers.19.2.to_q.weight": 1048576, + "transformer.layers.19.2.to_q.bias": 1024, + "transformer.layers.19.2.to_k.weight": 1048576, + "transformer.layers.19.2.to_k.bias": 1024, + "transformer.layers.19.2.to_v.weight": 1048576, + "transformer.layers.19.2.to_v.bias": 1024, + "transformer.layers.19.2.to_out.0.weight": 1048576, + "transformer.layers.19.2.to_out.0.bias": 1024, + "transformer.layers.19.3.g": 1024, + "transformer.layers.19.4.ff.0.0.weight": 4194304, + "transformer.layers.19.4.ff.0.0.bias": 4096, + "transformer.layers.19.4.ff.2.weight": 4194304, + "transformer.layers.19.4.ff.2.bias": 1024, + "transformer.layers.20.0.weight": 2097152, + "transformer.layers.20.1.g": 1024, + "transformer.layers.20.2.to_q.weight": 1048576, + "transformer.layers.20.2.to_q.bias": 1024, + "transformer.layers.20.2.to_k.weight": 1048576, + "transformer.layers.20.2.to_k.bias": 1024, + "transformer.layers.20.2.to_v.weight": 1048576, + "transformer.layers.20.2.to_v.bias": 1024, + "transformer.layers.20.2.to_out.0.weight": 1048576, + "transformer.layers.20.2.to_out.0.bias": 1024, + "transformer.layers.20.3.g": 1024, + "transformer.layers.20.4.ff.0.0.weight": 4194304, + "transformer.layers.20.4.ff.0.0.bias": 4096, + "transformer.layers.20.4.ff.2.weight": 4194304, + "transformer.layers.20.4.ff.2.bias": 1024, + "transformer.layers.21.0.weight": 2097152, + "transformer.layers.21.1.g": 1024, + "transformer.layers.21.2.to_q.weight": 1048576, + "transformer.layers.21.2.to_q.bias": 1024, + "transformer.layers.21.2.to_k.weight": 1048576, + "transformer.layers.21.2.to_k.bias": 1024, + "transformer.layers.21.2.to_v.weight": 1048576, + "transformer.layers.21.2.to_v.bias": 1024, + "transformer.layers.21.2.to_out.0.weight": 1048576, + "transformer.layers.21.2.to_out.0.bias": 1024, + "transformer.layers.21.3.g": 1024, + "transformer.layers.21.4.ff.0.0.weight": 4194304, + "transformer.layers.21.4.ff.0.0.bias": 4096, + "transformer.layers.21.4.ff.2.weight": 4194304, + "transformer.layers.21.4.ff.2.bias": 1024, + "transformer.layers.22.0.weight": 2097152, + "transformer.layers.22.1.g": 1024, + "transformer.layers.22.2.to_q.weight": 1048576, + "transformer.layers.22.2.to_q.bias": 1024, + "transformer.layers.22.2.to_k.weight": 1048576, + "transformer.layers.22.2.to_k.bias": 1024, + "transformer.layers.22.2.to_v.weight": 1048576, + "transformer.layers.22.2.to_v.bias": 1024, + "transformer.layers.22.2.to_out.0.weight": 1048576, + "transformer.layers.22.2.to_out.0.bias": 1024, + "transformer.layers.22.3.g": 1024, + "transformer.layers.22.4.ff.0.0.weight": 4194304, + "transformer.layers.22.4.ff.0.0.bias": 4096, + "transformer.layers.22.4.ff.2.weight": 4194304, + "transformer.layers.22.4.ff.2.bias": 1024, + "transformer.layers.23.0.weight": 2097152, + "transformer.layers.23.1.g": 1024, + "transformer.layers.23.2.to_q.weight": 1048576, + "transformer.layers.23.2.to_q.bias": 1024, + "transformer.layers.23.2.to_k.weight": 1048576, + "transformer.layers.23.2.to_k.bias": 1024, + "transformer.layers.23.2.to_v.weight": 1048576, + "transformer.layers.23.2.to_v.bias": 1024, + "transformer.layers.23.2.to_out.0.weight": 1048576, + "transformer.layers.23.2.to_out.0.bias": 1024, + "transformer.layers.23.3.g": 1024, + "transformer.layers.23.4.ff.0.0.weight": 4194304, + "transformer.layers.23.4.ff.0.0.bias": 4096, + "transformer.layers.23.4.ff.2.weight": 4194304, + "transformer.layers.23.4.ff.2.bias": 1024, + "transformer.layers.24.0.weight": 2097152, + "transformer.layers.24.1.g": 1024, + "transformer.layers.24.2.to_q.weight": 1048576, + "transformer.layers.24.2.to_q.bias": 1024, + "transformer.layers.24.2.to_k.weight": 1048576, + "transformer.layers.24.2.to_k.bias": 1024, + "transformer.layers.24.2.to_v.weight": 1048576, + "transformer.layers.24.2.to_v.bias": 1024, + "transformer.layers.24.2.to_out.0.weight": 1048576, + "transformer.layers.24.2.to_out.0.bias": 1024, + "transformer.layers.24.3.g": 1024, + "transformer.layers.24.4.ff.0.0.weight": 4194304, + "transformer.layers.24.4.ff.0.0.bias": 4096, + "transformer.layers.24.4.ff.2.weight": 4194304, + "transformer.layers.24.4.ff.2.bias": 1024, + "transformer.layers.25.0.weight": 2097152, + "transformer.layers.25.1.g": 1024, + "transformer.layers.25.2.to_q.weight": 1048576, + "transformer.layers.25.2.to_q.bias": 1024, + "transformer.layers.25.2.to_k.weight": 1048576, + "transformer.layers.25.2.to_k.bias": 1024, + "transformer.layers.25.2.to_v.weight": 1048576, + "transformer.layers.25.2.to_v.bias": 1024, + "transformer.layers.25.2.to_out.0.weight": 1048576, + "transformer.layers.25.2.to_out.0.bias": 1024, + "transformer.layers.25.3.g": 1024, + "transformer.layers.25.4.ff.0.0.weight": 4194304, + "transformer.layers.25.4.ff.0.0.bias": 4096, + "transformer.layers.25.4.ff.2.weight": 4194304, + "transformer.layers.25.4.ff.2.bias": 1024, + "transformer.norm_out.g": 1024, + "transformer.proj_out.weight": 102400, + "transformer.proj_out.bias": 100 + }, + "important_layers": [ + "transformer.time_embed.time_mlp.0.weight", + "transformer.time_embed.time_mlp.2.weight", + "transformer.text_embed.text_embed.weight", + "transformer.input_embed.proj.weight", + "transformer.input_embed.conv_pos_embed.conv1d.0.weight", + "transformer.input_embed.conv_pos_embed.conv1d.2.weight", + "transformer.layers.0.2.to_q.weight", + "transformer.layers.0.2.to_k.weight", + "transformer.layers.0.2.to_v.weight", + "transformer.layers.0.2.to_out.0.weight", + "transformer.layers.0.4.ff.0.0.weight", + "transformer.layers.0.4.ff.2.weight", + "transformer.layers.1.2.to_q.weight", + "transformer.layers.1.2.to_k.weight", + "transformer.layers.1.2.to_v.weight", + "transformer.layers.1.2.to_out.0.weight", + "transformer.layers.1.4.ff.0.0.weight", + "transformer.layers.1.4.ff.2.weight", + "transformer.layers.2.2.to_q.weight", + "transformer.layers.2.2.to_k.weight", + "transformer.layers.2.2.to_v.weight", + "transformer.layers.2.2.to_out.0.weight", + "transformer.layers.2.4.ff.0.0.weight", + "transformer.layers.2.4.ff.2.weight", + "transformer.layers.3.2.to_q.weight", + "transformer.layers.3.2.to_k.weight", + "transformer.layers.3.2.to_v.weight", + "transformer.layers.3.2.to_out.0.weight", + "transformer.layers.3.4.ff.0.0.weight", + "transformer.layers.3.4.ff.2.weight", + "transformer.layers.4.2.to_q.weight", + "transformer.layers.4.2.to_k.weight", + "transformer.layers.4.2.to_v.weight", + "transformer.layers.4.2.to_out.0.weight", + "transformer.layers.4.4.ff.0.0.weight", + "transformer.layers.4.4.ff.2.weight", + "transformer.layers.5.2.to_q.weight", + "transformer.layers.5.2.to_k.weight", + "transformer.layers.5.2.to_v.weight", + "transformer.layers.5.2.to_out.0.weight", + "transformer.layers.5.4.ff.0.0.weight", + "transformer.layers.5.4.ff.2.weight", + "transformer.layers.6.2.to_q.weight", + "transformer.layers.6.2.to_k.weight", + "transformer.layers.6.2.to_v.weight", + "transformer.layers.6.2.to_out.0.weight", + "transformer.layers.6.4.ff.0.0.weight", + "transformer.layers.6.4.ff.2.weight", + "transformer.layers.7.2.to_q.weight", + "transformer.layers.7.2.to_k.weight", + "transformer.layers.7.2.to_v.weight", + "transformer.layers.7.2.to_out.0.weight", + "transformer.layers.7.4.ff.0.0.weight", + "transformer.layers.7.4.ff.2.weight", + "transformer.layers.8.4.ff.0.0.weight", + "transformer.layers.8.4.ff.2.weight", + "transformer.layers.9.4.ff.0.0.weight", + "transformer.layers.9.4.ff.2.weight", + "transformer.layers.10.4.ff.0.0.weight", + "transformer.layers.10.4.ff.2.weight", + "transformer.layers.11.4.ff.0.0.weight", + "transformer.layers.11.4.ff.2.weight", + "transformer.layers.12.4.ff.0.0.weight", + "transformer.layers.12.4.ff.2.weight", + "transformer.layers.13.0.weight", + "transformer.layers.13.4.ff.0.0.weight", + "transformer.layers.13.4.ff.2.weight", + "transformer.layers.14.0.weight", + "transformer.layers.14.4.ff.0.0.weight", + "transformer.layers.14.4.ff.2.weight", + "transformer.layers.15.0.weight", + "transformer.layers.15.4.ff.0.0.weight", + "transformer.layers.15.4.ff.2.weight", + "transformer.layers.16.4.ff.0.0.weight", + "transformer.layers.16.4.ff.2.weight", + "transformer.layers.17.4.ff.0.0.weight", + "transformer.layers.17.4.ff.2.weight", + "transformer.layers.18.4.ff.0.0.weight", + "transformer.layers.18.4.ff.2.weight", + "transformer.layers.19.4.ff.0.0.weight", + "transformer.layers.19.4.ff.2.weight", + "transformer.layers.20.4.ff.0.0.weight", + "transformer.layers.20.4.ff.2.weight", + "transformer.layers.21.4.ff.0.0.weight", + "transformer.layers.21.4.ff.2.weight", + "transformer.layers.22.4.ff.0.0.weight", + "transformer.layers.22.4.ff.2.weight", + "transformer.layers.23.4.ff.0.0.weight", + "transformer.layers.23.4.ff.2.weight", + "transformer.layers.24.4.ff.0.0.weight", + "transformer.layers.24.4.ff.2.weight", + "transformer.layers.25.4.ff.0.0.weight", + "transformer.layers.25.4.ff.2.weight" + ], + "bottleneck_layers": [], + "attention_layers": [], + "projection_layers": [ + "transformer.input_embed.proj.weight", + "transformer.input_embed.proj.bias", + "transformer.proj_out.weight", + "transformer.proj_out.bias" + ], + "recommendations": { + "critical_layers": { + "layers": [ + "transformer.input_embed.conv_pos_embed.conv1d.0.weight", + "transformer.input_embed.conv_pos_embed.conv1d.2.weight", + "transformer.time_embed.time_mlp.2.weight", + "transformer.time_embed.time_mlp.0.weight", + "transformer.text_embed.text_embed.weight", + "transformer.time_embed.time_mlp.0.bias", + "transformer.time_embed.time_mlp.2.bias", + "transformer.input_embed.proj.weight", + "transformer.proj_out.weight", + "transformer.input_embed.proj.bias", + "transformer.layers.0.4.ff.0.0.weight", + "transformer.layers.0.4.ff.2.weight", + "transformer.layers.1.4.ff.0.0.weight", + "transformer.layers.1.4.ff.2.weight", + "transformer.layers.2.4.ff.0.0.weight", + "transformer.layers.2.4.ff.2.weight", + "transformer.layers.3.4.ff.0.0.weight", + "transformer.layers.3.4.ff.2.weight", + "transformer.layers.4.4.ff.0.0.weight", + "transformer.layers.4.4.ff.2.weight", + "transformer.layers.5.4.ff.0.0.weight", + "transformer.layers.5.4.ff.2.weight", + "transformer.layers.6.4.ff.0.0.weight", + "transformer.layers.6.4.ff.2.weight", + "transformer.layers.7.4.ff.0.0.weight", + "transformer.layers.7.4.ff.2.weight", + "transformer.layers.8.4.ff.0.0.weight", + "transformer.layers.8.4.ff.2.weight", + "transformer.layers.9.4.ff.0.0.weight", + "transformer.layers.9.4.ff.2.weight", + "transformer.layers.10.4.ff.0.0.weight", + "transformer.layers.10.4.ff.2.weight", + "transformer.layers.11.4.ff.0.0.weight", + "transformer.layers.12.4.ff.0.0.weight", + "transformer.layers.12.4.ff.2.weight", + "transformer.layers.13.4.ff.0.0.weight", + "transformer.layers.13.4.ff.2.weight", + "transformer.layers.14.4.ff.0.0.weight", + "transformer.layers.15.4.ff.0.0.weight", + "transformer.layers.15.4.ff.2.weight", + "transformer.layers.16.4.ff.0.0.weight", + "transformer.layers.16.4.ff.2.weight", + "transformer.layers.17.4.ff.0.0.weight", + "transformer.layers.17.4.ff.2.weight", + "transformer.layers.18.4.ff.0.0.weight", + "transformer.layers.18.4.ff.2.weight", + "transformer.layers.19.4.ff.0.0.weight", + "transformer.layers.19.4.ff.2.weight", + "transformer.layers.20.4.ff.0.0.weight", + "transformer.layers.20.4.ff.2.weight", + "transformer.layers.21.4.ff.0.0.weight", + "transformer.layers.21.4.ff.2.weight", + "transformer.layers.22.4.ff.0.0.weight", + "transformer.layers.22.4.ff.2.weight", + "transformer.layers.23.4.ff.0.0.weight", + "transformer.layers.23.4.ff.2.weight", + "transformer.layers.24.4.ff.0.0.weight", + "transformer.layers.24.4.ff.2.weight", + "transformer.layers.25.4.ff.0.0.weight", + "transformer.layers.25.4.ff.2.weight", + "transformer.layers.13.0.weight", + "transformer.layers.15.0.weight", + "transformer.layers.16.0.weight", + "transformer.layers.17.0.weight", + "transformer.layers.18.0.weight", + "transformer.layers.19.0.weight", + "transformer.layers.20.0.weight", + "transformer.layers.21.0.weight", + "transformer.layers.22.0.weight", + "transformer.layers.23.0.weight", + "transformer.layers.24.0.weight", + "transformer.layers.25.0.weight", + "transformer.layers.0.2.to_q.weight", + "transformer.layers.0.2.to_k.weight", + "transformer.layers.0.2.to_v.weight", + "transformer.layers.0.2.to_out.0.weight", + "transformer.layers.1.2.to_q.weight", + "transformer.layers.1.2.to_k.weight" + ], + "strategy": "targeted_enhancement", + "enhancement_factor": 1.3 + } + }, + "layer_connectivity": {}, + "parameter_statistics": {}, + "high_importance_layers": [ + "transformer.input_embed.conv_pos_embed.conv1d.0.weight", + "transformer.input_embed.conv_pos_embed.conv1d.2.weight", + "transformer.time_embed.time_mlp.2.weight", + "transformer.time_embed.time_mlp.0.weight", + "transformer.text_embed.text_embed.weight", + "transformer.time_embed.time_mlp.0.bias", + "transformer.time_embed.time_mlp.2.bias", + "transformer.input_embed.proj.weight", + "transformer.proj_out.weight", + "transformer.input_embed.proj.bias", + "transformer.layers.0.4.ff.0.0.weight", + "transformer.layers.0.4.ff.2.weight", + "transformer.layers.1.4.ff.0.0.weight", + "transformer.layers.1.4.ff.2.weight", + "transformer.layers.2.4.ff.0.0.weight", + "transformer.layers.2.4.ff.2.weight", + "transformer.layers.3.4.ff.0.0.weight", + "transformer.layers.3.4.ff.2.weight", + "transformer.layers.4.4.ff.0.0.weight", + "transformer.layers.4.4.ff.2.weight", + "transformer.layers.5.4.ff.0.0.weight", + "transformer.layers.5.4.ff.2.weight", + "transformer.layers.6.4.ff.0.0.weight", + "transformer.layers.6.4.ff.2.weight", + "transformer.layers.7.4.ff.0.0.weight", + "transformer.layers.7.4.ff.2.weight", + "transformer.layers.8.4.ff.0.0.weight", + "transformer.layers.8.4.ff.2.weight", + "transformer.layers.9.4.ff.0.0.weight", + "transformer.layers.9.4.ff.2.weight", + "transformer.layers.10.4.ff.0.0.weight", + "transformer.layers.10.4.ff.2.weight", + "transformer.layers.11.4.ff.0.0.weight", + "transformer.layers.12.4.ff.0.0.weight", + "transformer.layers.12.4.ff.2.weight", + "transformer.layers.13.4.ff.0.0.weight", + "transformer.layers.13.4.ff.2.weight", + "transformer.layers.14.4.ff.0.0.weight", + "transformer.layers.15.4.ff.0.0.weight", + "transformer.layers.15.4.ff.2.weight", + "transformer.layers.16.4.ff.0.0.weight", + "transformer.layers.16.4.ff.2.weight", + "transformer.layers.17.4.ff.0.0.weight", + "transformer.layers.17.4.ff.2.weight", + "transformer.layers.18.4.ff.0.0.weight", + "transformer.layers.18.4.ff.2.weight", + "transformer.layers.19.4.ff.0.0.weight", + "transformer.layers.19.4.ff.2.weight", + "transformer.layers.20.4.ff.0.0.weight", + "transformer.layers.20.4.ff.2.weight", + "transformer.layers.21.4.ff.0.0.weight", + "transformer.layers.21.4.ff.2.weight", + "transformer.layers.22.4.ff.0.0.weight", + "transformer.layers.22.4.ff.2.weight", + "transformer.layers.23.4.ff.0.0.weight", + "transformer.layers.23.4.ff.2.weight", + "transformer.layers.24.4.ff.0.0.weight", + "transformer.layers.24.4.ff.2.weight", + "transformer.layers.25.4.ff.0.0.weight", + "transformer.layers.25.4.ff.2.weight", + "transformer.layers.13.0.weight", + "transformer.layers.15.0.weight", + "transformer.layers.16.0.weight", + "transformer.layers.17.0.weight", + "transformer.layers.18.0.weight", + "transformer.layers.19.0.weight", + "transformer.layers.20.0.weight", + "transformer.layers.21.0.weight", + "transformer.layers.22.0.weight", + "transformer.layers.23.0.weight", + "transformer.layers.24.0.weight", + "transformer.layers.25.0.weight", + "transformer.layers.0.2.to_q.weight", + "transformer.layers.0.2.to_k.weight", + "transformer.layers.0.2.to_v.weight", + "transformer.layers.0.2.to_out.0.weight", + "transformer.layers.1.2.to_q.weight", + "transformer.layers.1.2.to_k.weight" + ], + "total_parameters": 391, + "total_elements": 360755948, + "param_ranges": { + "transformer.time_embed.time_mlp.0.weight": { + "min": -0.43014463782310486, + "max": 0.2980782687664032, + "mean": -0.002543725073337555, + "std": 0.04256265610456467, + "abs_mean": 0.03249503672122955, + "sparsity": 0.0, + "shape": [ + 1024, + 256 + ], + "norm": 21.830894470214844, + "elements": 262144, + "histogram": { + "counts": [ + 5, + 10, + 23, + 43, + 101, + 144, + 198, + 176, + 132, + 71, + 49, + 23, + 8, + 9, + 3, + 2, + 1, + 0, + 1, + 1 + ], + "bin_edges": [ + -0.1300935000181198, + -0.11207325011491776, + -0.0940530002117157, + -0.07603274285793304, + -0.05801249295473099, + -0.03999224305152893, + -0.021971985697746277, + -0.003951743245124817, + 0.014068514108657837, + 0.03208877146244049, + 0.05010901391506195, + 0.0681292712688446, + 0.08614952862262726, + 0.10416977107524872, + 0.12219001352787018, + 0.14021028578281403, + 0.15823052823543549, + 0.17625077068805695, + 0.1942710429430008, + 0.21229128539562225, + 0.23031151294708252 + ] + } + }, + "transformer.time_embed.time_mlp.0.bias": { + "min": -0.0628998726606369, + "max": 0.1072736531496048, + "mean": 0.0006290247547440231, + "std": 0.034041259437799454, + "abs_mean": 0.027421049773693085, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.0889742374420166, + "elements": 1024, + "histogram": { + "counts": [ + 26, + 60, + 59, + 79, + 70, + 83, + 106, + 99, + 92, + 89, + 51, + 40, + 47, + 36, + 21, + 13, + 17, + 7, + 2, + 3 + ], + "bin_edges": [ + -0.0628998726606369, + -0.054391197860240936, + -0.04588251933455467, + -0.03737384080886841, + -0.028865166008472443, + -0.020356491208076477, + -0.011847812682390213, + -0.003339134156703949, + 0.005169540643692017, + 0.013678215444087982, + 0.022186890244483948, + 0.03069557249546051, + 0.039204247295856476, + 0.04771292209625244, + 0.056221604347229004, + 0.06473027169704437, + 0.07323895394802094, + 0.0817476361989975, + 0.09025630354881287, + 0.09876498579978943, + 0.1072736531496048 + ] + } + }, + "transformer.time_embed.time_mlp.2.weight": { + "min": -0.41270536184310913, + "max": 0.8369129300117493, + "mean": -0.00020170127390883863, + "std": 0.024111710488796234, + "abs_mean": 0.01558289397507906, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 24.69097328186035, + "elements": 1048576, + "histogram": { + "counts": [ + 3, + 7, + 7, + 2, + 25, + 63, + 149, + 224, + 209, + 172, + 82, + 35, + 10, + 4, + 3, + 2, + 0, + 0, + 1, + 2 + ], + "bin_edges": [ + -0.08513874560594559, + -0.07475044578313828, + -0.06436215341091156, + -0.05397385358810425, + -0.043585557490587234, + -0.03319726139307022, + -0.02280896157026291, + -0.012420669198036194, + -0.002032369375228882, + 0.00835593044757843, + 0.018744222819805145, + 0.029132522642612457, + 0.03952082246541977, + 0.04990912228822708, + 0.0602974072098732, + 0.07068570703268051, + 0.08107400685548782, + 0.09146230667829514, + 0.10185060650110245, + 0.11223889142274857, + 0.12262718379497528 + ] + } + }, + "transformer.time_embed.time_mlp.2.bias": { + "min": -0.11501855403184891, + "max": 0.3208469748497009, + "mean": -0.0009418133413419127, + "std": 0.019536493346095085, + "abs_mean": 0.01235988549888134, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.6255888938903809, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 2, + 3, + 13, + 370, + 489, + 115, + 3, + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.11501855403184891, + -0.09322527796030045, + -0.07143200188875198, + -0.04963872581720352, + -0.02784544974565506, + -0.006052173674106598, + 0.015741102397441864, + 0.037534378468990326, + 0.05932765454053879, + 0.08112093061208725, + 0.10291420668363571, + 0.12470748275518417, + 0.14650076627731323, + 0.1682940423488617, + 0.19008731842041016, + 0.21188059449195862, + 0.23367387056350708, + 0.25546714663505554, + 0.277260422706604, + 0.29905369877815247, + 0.3208469748497009 + ] + } + }, + "transformer.text_embed.text_embed.weight": { + "min": -2.7886247634887695, + "max": 2.8676700592041016, + "mean": -0.0003673351602628827, + "std": 0.6154847145080566, + "abs_mean": 0.4908738136291504, + "sparsity": 0.0, + "shape": [ + 2546, + 100 + ], + "norm": 310.559814453125, + "elements": 254600, + "histogram": { + "counts": [ + 2, + 0, + 5, + 18, + 33, + 67, + 90, + 126, + 130, + 175, + 125, + 93, + 50, + 46, + 21, + 10, + 6, + 2, + 0, + 1 + ], + "bin_edges": [ + -2.093568801879883, + -1.8628169298171997, + -1.6320650577545166, + -1.401313304901123, + -1.17056143283844, + -0.9398095607757568, + -0.7090578079223633, + -0.4783059358596802, + -0.24755406379699707, + -0.016802310943603516, + 0.21394968032836914, + 0.4447014331817627, + 0.6754531860351562, + 0.9062051773071289, + 1.1369569301605225, + 1.3677089214324951, + 1.5984606742858887, + 1.8292124271392822, + 2.059964179992676, + 2.2907161712646484, + 2.5214684009552 + ] + } + }, + "transformer.input_embed.proj.weight": { + "min": -0.27889013290405273, + "max": 0.38151732087135315, + "mean": 0.0004236791573930532, + "std": 0.04274853691458702, + "abs_mean": 0.032939568161964417, + "sparsity": 0.0, + "shape": [ + 1024, + 300 + ], + "norm": 23.69471549987793, + "elements": 307200, + "histogram": { + "counts": [ + 2, + 1, + 4, + 3, + 9, + 17, + 37, + 59, + 96, + 122, + 139, + 149, + 149, + 83, + 59, + 36, + 22, + 8, + 3, + 2 + ], + "bin_edges": [ + -0.1740008443593979, + -0.15822970867156982, + -0.14245855808258057, + -0.1266874223947525, + -0.11091627925634384, + -0.09514513611793518, + -0.07937400043010712, + -0.06360285729169846, + -0.047831714153289795, + -0.03206057846546173, + -0.016289427876472473, + -0.0005182921886444092, + 0.015252843499183655, + 0.031023994088172913, + 0.04679512977600098, + 0.06256628036499023, + 0.0783374160528183, + 0.09410856664180756, + 0.10987968742847443, + 0.12565083801746368, + 0.14142198860645294 + ] + } + }, + "transformer.input_embed.proj.bias": { + "min": -0.2219879925251007, + "max": 0.2091645449399948, + "mean": -0.004480332136154175, + "std": 0.040872007608413696, + "abs_mean": 0.03087138757109642, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.3151038885116577, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 2, + 0, + 5, + 2, + 12, + 19, + 65, + 158, + 197, + 228, + 197, + 81, + 17, + 8, + 1, + 3, + 1, + 2, + 1 + ], + "bin_edges": [ + -0.2219879925251007, + -0.20043036341667175, + -0.1788727343082428, + -0.15731512010097504, + -0.13575749099254608, + -0.11419986188411713, + -0.09264224767684937, + -0.07108461856842041, + -0.049526989459991455, + -0.0279693603515625, + -0.006411731243133545, + 0.015145882964134216, + 0.03670349717140198, + 0.05826112627983093, + 0.07981875538825989, + 0.10137638449668884, + 0.1229340136051178, + 0.14449164271354675, + 0.1660492718219757, + 0.18760690093040466, + 0.2091645449399948 + ] + } + }, + "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { + "min": -0.42831405997276306, + "max": 0.47610175609588623, + "mean": 3.7659003737644525e-06, + "std": 0.024510981515049934, + "abs_mean": 0.018337251618504524, + "sparsity": 0.0, + "shape": [ + 1024, + 64, + 31 + ], + "norm": 34.935028076171875, + "elements": 2031616, + "histogram": { + "counts": [ + 2, + 2, + 6, + 8, + 19, + 34, + 59, + 107, + 163, + 177, + 135, + 120, + 73, + 45, + 27, + 12, + 6, + 2, + 1, + 2 + ], + "bin_edges": [ + -0.08790014684200287, + -0.07873495668172836, + -0.06956977397203445, + -0.06040458381175995, + -0.05123939737677574, + -0.042074210941791534, + -0.03290902078151703, + -0.02374383807182312, + -0.014578647911548615, + -0.005413457751274109, + 0.0037517249584198, + 0.012916915118694305, + 0.02208210527896881, + 0.03124728798866272, + 0.04041247069835663, + 0.04957766830921173, + 0.05874285101890564, + 0.06790803372859955, + 0.07707323133945465, + 0.08623841404914856, + 0.09540360420942307 + ] + } + }, + "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { + "min": -0.3244315981864929, + "max": 0.15647757053375244, + "mean": -0.046661682426929474, + "std": 0.05150889977812767, + "abs_mean": 0.054327093064785004, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 2.223456382751465, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 1, + 0, + 1, + 1, + 1, + 13, + 28, + 79, + 113, + 144, + 147, + 208, + 162, + 73, + 18, + 4, + 3, + 1, + 2 + ], + "bin_edges": [ + -0.3244315981864929, + -0.30038613080978394, + -0.27634069323539734, + -0.25229522585868835, + -0.22824975848197937, + -0.20420430600643158, + -0.1801588535308838, + -0.1561133861541748, + -0.13206793367862701, + -0.10802248120307922, + -0.08397701382637024, + -0.059931546449661255, + -0.03588610887527466, + -0.011840641498565674, + 0.01220482587814331, + 0.03625026345252991, + 0.06029573082923889, + 0.08434119820594788, + 0.10838663578033447, + 0.13243210315704346, + 0.15647757053375244 + ] + } + }, + "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { + "min": -0.4104415476322174, + "max": 0.3546721041202545, + "mean": -0.00013054230657871813, + "std": 0.02360478602349758, + "abs_mean": 0.017416419461369514, + "sparsity": 0.0, + "shape": [ + 1024, + 64, + 31 + ], + "norm": 33.64410400390625, + "elements": 2031616, + "histogram": { + "counts": [ + 2, + 2, + 2, + 11, + 14, + 36, + 81, + 138, + 248, + 214, + 123, + 75, + 29, + 14, + 6, + 2, + 1, + 1, + 0, + 1 + ], + "bin_edges": [ + -0.10315045714378357, + -0.09150389581918716, + -0.07985734194517136, + -0.06821078062057495, + -0.056564223021268845, + -0.04491766542196274, + -0.03327110409736633, + -0.021624550223350525, + -0.00997798889875412, + 0.0016685724258422852, + 0.013315126299858093, + 0.0249616801738739, + 0.0366082489490509, + 0.04825480282306671, + 0.05990135669708252, + 0.07154792547225952, + 0.08319447934627533, + 0.09484103322029114, + 0.10648760199546814, + 0.11813415586948395, + 0.12978070974349976 + ] + } + }, + "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { + "min": -0.22924789786338806, + "max": 0.2620227038860321, + "mean": -0.029105938971042633, + "std": 0.04928705468773842, + "abs_mean": 0.042650409042835236, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.8310034275054932, + "elements": 1024, + "histogram": { + "counts": [ + 4, + 4, + 9, + 14, + 35, + 76, + 106, + 172, + 216, + 218, + 100, + 36, + 7, + 2, + 0, + 0, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.22924789786338806, + -0.20468436181545258, + -0.18012084066867828, + -0.1555573046207428, + -0.1309937834739685, + -0.10643024742603302, + -0.08186671137809753, + -0.05730319023132324, + -0.032739654183387756, + -0.00817611813545227, + 0.01638740301132202, + 0.040950924158096313, + 0.065514475107193, + 0.09007799625396729, + 0.11464151740074158, + 0.13920506834983826, + 0.16376858949661255, + 0.18833211064338684, + 0.21289566159248352, + 0.2374591827392578, + 0.2620227038860321 + ] + } + }, + "transformer.layers.0.1.g": { + "min": 0.2546031177043915, + "max": 0.8185229301452637, + "mean": 0.5252923965454102, + "std": 0.08049347996711731, + "abs_mean": 0.5252923965454102, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 17.005373001098633, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 2, + 5, + 14, + 21, + 66, + 66, + 100, + 146, + 167, + 140, + 83, + 75, + 53, + 31, + 13, + 5, + 6, + 2, + 4 + ], + "bin_edges": [ + 0.2546031177043915, + 0.28279909491539, + 0.31099510192871094, + 0.3391910791397095, + 0.367387056350708, + 0.39558306336402893, + 0.42377904057502747, + 0.4519750475883484, + 0.4801710247993469, + 0.5083670020103455, + 0.5365630388259888, + 0.5647590160369873, + 0.5929549932479858, + 0.6211509704589844, + 0.6493469476699829, + 0.6775429248809814, + 0.70573890209198, + 0.7339348793029785, + 0.762130856513977, + 0.7903269529342651, + 0.8185229301452637 + ] + } + }, + "transformer.layers.0.2.to_q.weight": { + "min": -0.296941339969635, + "max": 0.2655627429485321, + "mean": -0.0004258690751157701, + "std": 0.03210259974002838, + "abs_mean": 0.024999314919114113, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 32.87555694580078, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 4, + 5, + 13, + 29, + 41, + 80, + 134, + 166, + 162, + 131, + 88, + 69, + 36, + 14, + 20, + 3, + 2, + 0, + 2 + ], + "bin_edges": [ + -0.11262407153844833, + -0.10048417001962662, + -0.0883442685008049, + -0.07620436698198318, + -0.06406446546316147, + -0.05192456394433975, + -0.039784662425518036, + -0.02764476090669632, + -0.015504859387874603, + -0.003364957869052887, + 0.00877494364976883, + 0.020914845168590546, + 0.03305474668741226, + 0.04519464820623398, + 0.057334549725055695, + 0.06947445124387741, + 0.08161435276269913, + 0.09375425428152084, + 0.10589415580034256, + 0.11803405731916428, + 0.1301739513874054 + ] + } + }, + "transformer.layers.0.2.to_q.bias": { + "min": -0.09266690164804459, + "max": 0.12469176203012466, + "mean": 0.0006477286806330085, + "std": 0.025720255449414253, + "abs_mean": 0.019480330869555473, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.8229072690010071, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 3, + 12, + 16, + 26, + 53, + 109, + 175, + 195, + 161, + 112, + 70, + 34, + 12, + 9, + 8, + 0, + 3, + 0, + 1 + ], + "bin_edges": [ + -0.09266690164804459, + -0.0817989706993103, + -0.07093103229999542, + -0.06006310135126114, + -0.049195170402526855, + -0.03832723945379257, + -0.02745930105447769, + -0.016591370105743408, + -0.005723439157009125, + 0.005144491791725159, + 0.016012422740459442, + 0.026880361139774323, + 0.0377482995390892, + 0.04861622303724289, + 0.05948416143655777, + 0.07035208493471146, + 0.08122002333402634, + 0.09208796173334122, + 0.1029558852314949, + 0.11382382363080978, + 0.12469176203012466 + ] + } + }, + "transformer.layers.0.2.to_k.weight": { + "min": -0.2905982434749603, + "max": 0.28104421496391296, + "mean": -7.510318391723558e-05, + "std": 0.03093179315328598, + "abs_mean": 0.023867137730121613, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 31.673906326293945, + "elements": 1048576, + "histogram": { + "counts": [ + 3, + 4, + 11, + 12, + 21, + 53, + 86, + 111, + 131, + 143, + 141, + 111, + 80, + 36, + 33, + 13, + 5, + 2, + 3, + 1 + ], + "bin_edges": [ + -0.1047215387225151, + -0.09359753876924515, + -0.08247353136539459, + -0.07134953141212463, + -0.060225531458854675, + -0.04910153150558472, + -0.03797752410173416, + -0.026853524148464203, + -0.015729524195194244, + -0.004605524241924286, + 0.006518475711345673, + 0.017642483115196228, + 0.028766490519046783, + 0.03989049047231674, + 0.0510144904255867, + 0.06213849037885666, + 0.07326249033212662, + 0.08438649028539658, + 0.09551049023866653, + 0.10663449019193649, + 0.11775848269462585 + ] + } + }, + "transformer.layers.0.2.to_k.bias": { + "min": -5.890929698944092, + "max": 5.805842876434326, + "mean": -0.009318170137703419, + "std": 1.2943130731582642, + "abs_mean": 0.8134283423423767, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 41.39886474609375, + "elements": 1024, + "histogram": { + "counts": [ + 4, + 9, + 4, + 3, + 2, + 3, + 31, + 44, + 105, + 260, + 304, + 120, + 55, + 30, + 7, + 4, + 0, + 4, + 5, + 6 + ], + "bin_edges": [ + -5.890929698944092, + -5.30609130859375, + -4.72125244140625, + -4.13641357421875, + -3.551575183868408, + -2.9667365550994873, + -2.3818979263305664, + -1.7970595359802246, + -1.2122206687927246, + -0.6273818016052246, + -0.04254341125488281, + 0.542294979095459, + 1.127133846282959, + 1.711972713470459, + 2.2968106269836426, + 2.8816494941711426, + 3.4664883613586426, + 4.051327228546143, + 4.636166095733643, + 5.221004009246826, + 5.805842876434326 + ] + } + }, + "transformer.layers.0.2.to_v.weight": { + "min": -0.42498156428337097, + "max": 0.3436700105667114, + "mean": 9.804974979488179e-05, + "std": 0.029953550547361374, + "abs_mean": 0.021628363057971, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 30.67213249206543, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 10, + 13, + 26, + 45, + 154, + 271, + 282, + 115, + 54, + 21, + 4, + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.1268398016691208, + -0.10867056995630264, + -0.0905013382434845, + -0.07233210653066635, + -0.054162874817848206, + -0.03599364310503006, + -0.017824411392211914, + 0.0003448277711868286, + 0.018514052033424377, + 0.036683276295661926, + 0.05485251545906067, + 0.07302175462245941, + 0.09119097888469696, + 0.10936020314693451, + 0.12752945721149445, + 0.145698681473732, + 0.16386790573596954, + 0.1820371299982071, + 0.20020635426044464, + 0.21837560832500458, + 0.23654483258724213 + ] + } + }, + "transformer.layers.0.2.to_v.bias": { + "min": -0.029002565890550613, + "max": 0.027599314227700233, + "mean": -0.0003237572673242539, + "std": 0.01257046777755022, + "abs_mean": 0.010674269869923592, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.4021919369697571, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 18, + 26, + 44, + 55, + 60, + 72, + 89, + 62, + 66, + 62, + 59, + 83, + 80, + 69, + 51, + 40, + 31, + 16, + 15 + ], + "bin_edges": [ + -0.029002565890550613, + -0.026172472164034843, + -0.023342378437519073, + -0.020512282848358154, + -0.017682189121842384, + -0.014852095395326614, + -0.012021999806165695, + -0.009191906079649925, + -0.006361812353134155, + -0.0035317186266183853, + -0.0007016249001026154, + 0.002128470689058304, + 0.004958566278219223, + 0.007788658142089844, + 0.010618753731250763, + 0.013448845595121384, + 0.016278941184282303, + 0.019109036773443222, + 0.021939128637313843, + 0.024769224226474762, + 0.027599314227700233 + ] + } + }, + "transformer.layers.0.2.to_out.0.weight": { + "min": -0.45393431186676025, + "max": 0.44807320833206177, + "mean": 2.389570181549061e-05, + "std": 0.023853935301303864, + "abs_mean": 0.01586836948990822, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 24.425954818725586, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 2, + 2, + 3, + 6, + 13, + 59, + 227, + 427, + 185, + 49, + 16, + 6, + 3, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.2009379267692566, + -0.1832694113254547, + -0.16560088098049164, + -0.14793236553668976, + -0.13026383519172668, + -0.1125953197479248, + -0.09492680430412292, + -0.07725828140974045, + -0.05958975851535797, + -0.04192124307155609, + -0.024252712726593018, + -0.006584197282791138, + 0.011084318161010742, + 0.028752848505973816, + 0.046421363949775696, + 0.06408989429473877, + 0.08175840973854065, + 0.09942692518234253, + 0.11709544062614441, + 0.13476398587226868, + 0.15243251621723175 + ] + } + }, + "transformer.layers.0.2.to_out.0.bias": { + "min": -0.0885927751660347, + "max": 0.09089276939630508, + "mean": 0.0022863608319312334, + "std": 0.019503755494952202, + "abs_mean": 0.01498686708509922, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.6280912160873413, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 1, + 4, + 3, + 4, + 12, + 27, + 83, + 144, + 204, + 181, + 162, + 85, + 49, + 21, + 13, + 4, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.0885927751660347, + -0.07961849868297577, + -0.07064422219991684, + -0.06166994199156761, + -0.05269566550850868, + -0.04372138902544975, + -0.034747108817100525, + -0.025772832334041595, + -0.016798555850982666, + -0.007824279367923737, + 0.0011499971151351929, + 0.010124273598194122, + 0.01909855753183365, + 0.028072834014892578, + 0.03704711049795151, + 0.04602137953042984, + 0.054995663464069366, + 0.06396994739770889, + 0.07294421643018723, + 0.08191850036382675, + 0.09089276939630508 + ] + } + }, + "transformer.layers.0.3.g": { + "min": 0.2667909264564514, + "max": 1.0541586875915527, + "mean": 0.5309650301933289, + "std": 0.10402658581733704, + "abs_mean": 0.5309650301933289, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 17.31359100341797, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 3, + 35, + 111, + 141, + 159, + 163, + 105, + 77, + 78, + 60, + 32, + 13, + 13, + 2, + 2, + 0, + 3, + 0, + 1 + ], + "bin_edges": [ + 0.2667909264564514, + 0.3061593174934387, + 0.345527708530426, + 0.38489609956741333, + 0.42426449060440063, + 0.46363288164138794, + 0.5030012130737305, + 0.5423696041107178, + 0.5817379951477051, + 0.6211063861846924, + 0.6604747772216797, + 0.699843168258667, + 0.7392115592956543, + 0.7785799503326416, + 0.8179483413696289, + 0.8573167324066162, + 0.8966851234436035, + 0.9360535144805908, + 0.9754219055175781, + 1.0147902965545654, + 1.0541586875915527 + ] + } + }, + "transformer.layers.0.4.ff.0.0.weight": { + "min": -0.5743634104728699, + "max": 0.6081749796867371, + "mean": -0.0004296167753636837, + "std": 0.03860084339976311, + "abs_mean": 0.02932225726544857, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 79.05066680908203, + "elements": 4194304, + "histogram": { + "counts": [ + 2, + 2, + 2, + 5, + 13, + 20, + 48, + 103, + 147, + 198, + 181, + 143, + 76, + 38, + 14, + 3, + 4, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.16866450011730194, + -0.15148140490055084, + -0.13429829478263855, + -0.11711519956588745, + -0.09993210434913635, + -0.08274900913238525, + -0.06556590646505356, + -0.04838280379772186, + -0.031199708580970764, + -0.014016613364219666, + 0.003166481852531433, + 0.020349591970443726, + 0.037532687187194824, + 0.05471578240394592, + 0.07189889252185822, + 0.08908198773860931, + 0.10626508295536041, + 0.12344817817211151, + 0.1406312733888626, + 0.1578143686056137, + 0.1749974638223648 + ] + } + }, + "transformer.layers.0.4.ff.0.0.bias": { + "min": -0.18247899413108826, + "max": 0.04562002047896385, + "mean": -0.029428046196699142, + "std": 0.04256246238946915, + "abs_mean": 0.03553260490298271, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 3.3114237785339355, + "elements": 4096, + "histogram": { + "counts": [ + 1, + 3, + 6, + 9, + 17, + 31, + 42, + 43, + 40, + 38, + 28, + 32, + 48, + 80, + 143, + 181, + 157, + 80, + 20, + 1 + ], + "bin_edges": [ + -0.17975062131881714, + -0.16848209500312805, + -0.15721355378627777, + -0.14594502747058868, + -0.1346764862537384, + -0.12340795993804932, + -0.11213943362236023, + -0.10087089985609055, + -0.08960236608982086, + -0.07833383232355118, + -0.0670652985572815, + -0.05579677224159241, + -0.04452824592590332, + -0.03325970470905304, + -0.021991178393363953, + -0.010722637176513672, + 0.000545889139175415, + 0.011814415454864502, + 0.023082956671714783, + 0.03435148298740387, + 0.04562002047896385 + ] + } + }, + "transformer.layers.0.4.ff.2.weight": { + "min": -1.1666945219039917, + "max": 1.633580207824707, + "mean": 0.00032344614737667143, + "std": 0.027696726843714714, + "abs_mean": 0.02006993629038334, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ], + "norm": 56.71977615356445, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 0, + 0, + 1, + 39, + 262, + 490, + 188, + 14, + 3, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.2090015709400177, + -0.176393523812294, + -0.1437854766845703, + -0.11117742955684662, + -0.07856938242912292, + -0.04596133530139923, + -0.013353288173675537, + 0.019254758954048157, + 0.05186280608177185, + 0.08447083830833435, + 0.11707890033721924, + 0.14968696236610413, + 0.18229499459266663, + 0.21490302681922913, + 0.247511088848114, + 0.2801191508769989, + 0.3127271831035614, + 0.3453352153301239, + 0.3779432475566864, + 0.4105513393878937, + 0.4431593716144562 + ] + } + }, + "transformer.layers.0.4.ff.2.bias": { + "min": -0.16206279397010803, + "max": 0.20534056425094604, + "mean": -0.02111881598830223, + "std": 0.027917111292481422, + "abs_mean": 0.027663614600896835, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.1198209524154663, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 2, + 7, + 19, + 84, + 198, + 278, + 259, + 99, + 37, + 10, + 3, + 0, + 0, + 2, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.16206279397010803, + -0.14369262754917145, + -0.12532246112823486, + -0.10695228725671768, + -0.0885821208357811, + -0.07021195441484451, + -0.05184178054332733, + -0.03347161412239075, + -0.015101447701454163, + 0.003268718719482422, + 0.021638885140419006, + 0.04000905156135559, + 0.05837923288345337, + 0.07674939930438995, + 0.09511956572532654, + 0.11348971724510193, + 0.1318598985671997, + 0.15023007988929749, + 0.16860023140907288, + 0.18697041273117065, + 0.20534056425094604 + ] + } + }, + "transformer.layers.1.1.g": { + "min": 0.22404542565345764, + "max": 0.8422443866729736, + "mean": 0.4874877631664276, + "std": 0.07493799924850464, + "abs_mean": 0.4874877631664276, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 15.782669067382812, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 6, + 9, + 13, + 36, + 66, + 116, + 169, + 178, + 152, + 118, + 72, + 28, + 20, + 8, + 3, + 2, + 0, + 0, + 3 + ], + "bin_edges": [ + 0.22404542565345764, + 0.25495538115501404, + 0.28586533665657043, + 0.31677526235580444, + 0.34768521785736084, + 0.37859517335891724, + 0.40950512886047363, + 0.44041508436203003, + 0.4713250398635864, + 0.502234935760498, + 0.5331449508666992, + 0.5640548467636108, + 0.594964861869812, + 0.6258747577667236, + 0.65678471326828, + 0.6876946687698364, + 0.7186046242713928, + 0.7495145797729492, + 0.7804244756698608, + 0.811334490776062, + 0.8422443866729736 + ] + } + }, + "transformer.layers.1.2.to_q.weight": { + "min": -0.255166620016098, + "max": 0.305690199136734, + "mean": -6.7684013629332185e-06, + "std": 0.03347513824701309, + "abs_mean": 0.02612040936946869, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 34.27812957763672, + "elements": 1048576, + "histogram": { + "counts": [ + 3, + 4, + 6, + 5, + 31, + 52, + 79, + 127, + 150, + 159, + 135, + 95, + 70, + 36, + 27, + 13, + 7, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.11421064287424088, + -0.1019379198551178, + -0.08966518938541412, + -0.07739246636629105, + -0.06511974334716797, + -0.05284702032804489, + -0.04057428985834122, + -0.02830156683921814, + -0.016028843820095062, + -0.003756120800971985, + 0.008516602218151093, + 0.020789332687854767, + 0.03306206315755844, + 0.04533477872610092, + 0.057607509195804596, + 0.06988022476434708, + 0.08215295523405075, + 0.09442568570375443, + 0.1066984012722969, + 0.11897113174200058, + 0.13124385476112366 + ] + } + }, + "transformer.layers.1.2.to_q.bias": { + "min": -0.09524397552013397, + "max": 0.11034096777439117, + "mean": 6.5918720792979e-05, + "std": 0.026950189843773842, + "abs_mean": 0.021233120933175087, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.8619874715805054, + "elements": 1024, + "histogram": { + "counts": [ + 4, + 1, + 3, + 9, + 24, + 52, + 99, + 122, + 165, + 138, + 126, + 118, + 63, + 46, + 9, + 9, + 7, + 1, + 1, + 3 + ], + "bin_edges": [ + -0.09524397552013397, + -0.08496472984552383, + -0.0746854841709137, + -0.06440623104572296, + -0.054126985371112823, + -0.043847739696502686, + -0.03356849029660225, + -0.023289240896701813, + -0.013009995222091675, + -0.002730749547481537, + 0.007548496127128601, + 0.017827749252319336, + 0.028106994926929474, + 0.03838624060153961, + 0.04866549372673035, + 0.05894473195075989, + 0.06922398507595062, + 0.07950323820114136, + 0.0897824764251709, + 0.10006172955036163, + 0.11034096777439117 + ] + } + }, + "transformer.layers.1.2.to_k.weight": { + "min": -0.29684391617774963, + "max": 0.295682817697525, + "mean": 5.335842797649093e-05, + "std": 0.03254625201225281, + "abs_mean": 0.025315403938293457, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 33.326988220214844, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 1, + 5, + 10, + 11, + 37, + 78, + 121, + 160, + 157, + 154, + 111, + 76, + 42, + 20, + 6, + 5, + 2, + 1, + 1 + ], + "bin_edges": [ + -0.12574371695518494, + -0.11253877729177475, + -0.09933383762836456, + -0.08612889796495438, + -0.07292395830154419, + -0.059719018638134, + -0.046514078974723816, + -0.03330913931131363, + -0.020104199647903442, + -0.006899259984493256, + 0.006305679678916931, + 0.019510626792907715, + 0.032715559005737305, + 0.045920491218566895, + 0.05912543833255768, + 0.07233038544654846, + 0.08553531765937805, + 0.09874024987220764, + 0.11194519698619843, + 0.1251501441001892, + 0.1383550763130188 + ] + } + }, + "transformer.layers.1.2.to_k.bias": { + "min": -5.156938552856445, + "max": 5.0772905349731445, + "mean": -0.014555896632373333, + "std": 1.1561553478240967, + "abs_mean": 0.6372154355049133, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 36.981834411621094, + "elements": 1024, + "histogram": { + "counts": [ + 6, + 10, + 2, + 7, + 8, + 6, + 13, + 31, + 72, + 308, + 364, + 75, + 45, + 21, + 10, + 5, + 0, + 7, + 2, + 8 + ], + "bin_edges": [ + -5.156938552856445, + -4.645226955413818, + -4.133515357971191, + -3.6218042373657227, + -3.1100926399230957, + -2.5983810424804688, + -2.086669683456421, + -1.574958324432373, + -1.063246726989746, + -0.5515351295471191, + -0.03982353210449219, + 0.47188758850097656, + 0.9835991859436035, + 1.4953107833862305, + 2.007021903991699, + 2.518733501434326, + 3.030445098876953, + 3.542156219482422, + 4.053868293762207, + 4.565579414367676, + 5.0772905349731445 + ] + } + }, + "transformer.layers.1.2.to_v.weight": { + "min": -0.3448536694049835, + "max": 0.34325698018074036, + "mean": 7.860038749640808e-05, + "std": 0.0300619974732399, + "abs_mean": 0.02246847189962864, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 30.78302574157715, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 2, + 11, + 37, + 72, + 152, + 269, + 249, + 120, + 58, + 17, + 2, + 3, + 2, + 0, + 0, + 0, + 1, + 1, + 2 + ], + "bin_edges": [ + -0.11359164118766785, + -0.09682722389698029, + -0.08006280660629272, + -0.06329838931560516, + -0.0465339720249176, + -0.02976955473423004, + -0.01300513744354248, + 0.0037592798471450806, + 0.02052369713783264, + 0.0372881144285202, + 0.054052531719207764, + 0.07081694900989532, + 0.08758136630058289, + 0.10434578359127045, + 0.12111020088195801, + 0.13787463307380676, + 0.15463903546333313, + 0.1714034378528595, + 0.18816787004470825, + 0.204932302236557, + 0.22169671952724457 + ] + } + }, + "transformer.layers.1.2.to_v.bias": { + "min": -0.03601115196943283, + "max": 0.03331650421023369, + "mean": -0.0001408920797985047, + "std": 0.013034623116254807, + "abs_mean": 0.01087227649986744, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.41692858934402466, + "elements": 1024, + "histogram": { + "counts": [ + 3, + 4, + 10, + 26, + 38, + 69, + 64, + 76, + 82, + 99, + 83, + 79, + 95, + 82, + 68, + 61, + 35, + 16, + 7, + 3 + ], + "bin_edges": [ + -0.03601115196943283, + -0.03254476934671402, + -0.02907838672399521, + -0.025612004101276398, + -0.022145621478557587, + -0.018679238855838776, + -0.015212856233119965, + -0.011746473610401154, + -0.008280090987682343, + -0.0048137083649635315, + -0.0013473257422447205, + 0.0021190568804740906, + 0.005585439503192902, + 0.009051822125911713, + 0.012518204748630524, + 0.015984587371349335, + 0.019450969994068146, + 0.022917352616786957, + 0.026383735239505768, + 0.02985011786222458, + 0.03331650421023369 + ] + } + }, + "transformer.layers.1.2.to_out.0.weight": { + "min": -0.31532466411590576, + "max": 0.3747538924217224, + "mean": -2.0682646209024824e-05, + "std": 0.024059493094682693, + "abs_mean": 0.017171449959278107, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 24.636526107788086, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 0, + 2, + 0, + 4, + 0, + 8, + 16, + 50, + 125, + 272, + 278, + 150, + 57, + 24, + 6, + 3, + 0, + 2, + 1 + ], + "bin_edges": [ + -0.15282730758190155, + -0.13901641964912415, + -0.12520551681518555, + -0.11139462888240814, + -0.09758373349905014, + -0.08377283811569214, + -0.06996195018291473, + -0.05615105479955673, + -0.04234015941619873, + -0.02852926403284073, + -0.014718368649482727, + -0.0009074807167053223, + 0.012903407216072083, + 0.02671431005001068, + 0.040525197982788086, + 0.054336100816726685, + 0.06814698874950409, + 0.0819578766822815, + 0.09576877951622009, + 0.1095796674489975, + 0.1233905628323555 + ] + } + }, + "transformer.layers.1.2.to_out.0.bias": { + "min": -0.10526668280363083, + "max": 0.12198653072118759, + "mean": -0.001968209631741047, + "std": 0.0288400761783123, + "abs_mean": 0.022421324625611305, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.924579381942749, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 1, + 4, + 17, + 23, + 48, + 103, + 133, + 171, + 146, + 154, + 83, + 56, + 26, + 17, + 4, + 8, + 3, + 1, + 1 + ], + "bin_edges": [ + -0.10526668280363083, + -0.09390401840209961, + -0.08254136145114899, + -0.07117870450019836, + -0.059816040098667145, + -0.048453379422426224, + -0.0370907187461853, + -0.025728054344654083, + -0.01436539739370346, + -0.003002740442752838, + 0.008359923958778381, + 0.0197225883603096, + 0.031085245311260223, + 0.042447902262210846, + 0.05381057411432266, + 0.06517323106527328, + 0.07653588801622391, + 0.08789854496717453, + 0.09926120191812515, + 0.11062387377023697, + 0.12198653072118759 + ] + } + }, + "transformer.layers.1.3.g": { + "min": 0.3114672601222992, + "max": 1.1185976266860962, + "mean": 0.6660763025283813, + "std": 0.09736555069684982, + "abs_mean": 0.6660763025283813, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 21.540740966796875, + "elements": 1024, + "histogram": { + "counts": [ + 4, + 3, + 11, + 18, + 20, + 54, + 89, + 129, + 211, + 200, + 97, + 77, + 47, + 27, + 7, + 1, + 4, + 0, + 0, + 1 + ], + "bin_edges": [ + 0.3114672601222992, + 0.3518237769603729, + 0.39218029379844666, + 0.4325368106365204, + 0.4728933274745941, + 0.5132498741149902, + 0.553606390953064, + 0.5939629077911377, + 0.6343194246292114, + 0.6746759414672852, + 0.7150324583053589, + 0.7553889751434326, + 0.7957454919815063, + 0.8361020088195801, + 0.8764585256576538, + 0.9168150424957275, + 0.9571715593338013, + 0.997528076171875, + 1.0378845930099487, + 1.0782411098480225, + 1.1185976266860962 + ] + } + }, + "transformer.layers.1.4.ff.0.0.weight": { + "min": -0.872668981552124, + "max": 0.6275054216384888, + "mean": 0.0016755885444581509, + "std": 0.04743882641196251, + "abs_mean": 0.035196080803871155, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 97.20372009277344, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 0, + 1, + 5, + 6, + 13, + 48, + 132, + 221, + 272, + 179, + 78, + 22, + 11, + 5, + 4, + 2 + ], + "bin_edges": [ + -0.33325523138046265, + -0.3059375286102295, + -0.27861982583999634, + -0.2513021230697632, + -0.22398442029953003, + -0.19666671752929688, + -0.16934901475906372, + -0.14203131198883057, + -0.11471360921859741, + -0.08739590644836426, + -0.060078203678131104, + -0.03276050090789795, + -0.005442798137664795, + 0.02187490463256836, + 0.049192607402801514, + 0.07651031017303467, + 0.10382801294326782, + 0.13114571571350098, + 0.15846341848373413, + 0.18578112125396729, + 0.21309885382652283 + ] + } + }, + "transformer.layers.1.4.ff.0.0.bias": { + "min": -0.2710971236228943, + "max": 0.03426326811313629, + "mean": -0.0465819425880909, + "std": 0.04054969921708107, + "abs_mean": 0.04834957420825958, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 3.9523580074310303, + "elements": 4096, + "histogram": { + "counts": [ + 1, + 0, + 1, + 5, + 4, + 5, + 4, + 8, + 26, + 35, + 66, + 77, + 68, + 117, + 105, + 126, + 128, + 137, + 68, + 19 + ], + "bin_edges": [ + -0.2314978539943695, + -0.2185397893190384, + -0.20558173954486847, + -0.19262367486953735, + -0.17966562509536743, + -0.16670756042003632, + -0.1537494957447052, + -0.14079144597053528, + -0.12783338129520416, + -0.11487532407045364, + -0.10191726684570312, + -0.08895920217037201, + -0.0760011374950409, + -0.06304308772087097, + -0.050085023045539856, + -0.037126973271369934, + -0.02416890859603882, + -0.011210843920707703, + 0.0017472058534622192, + 0.014705270528793335, + 0.02766331285238266 + ] + } + }, + "transformer.layers.1.4.ff.2.weight": { + "min": -0.922234833240509, + "max": 0.9643772840499878, + "mean": 0.0010214494541287422, + "std": 0.04070669412612915, + "abs_mean": 0.027846619486808777, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ], + "norm": 83.38308715820312, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 0, + 1, + 1, + 2, + 4, + 4, + 23, + 79, + 267, + 354, + 183, + 50, + 14, + 9, + 3, + 1, + 2, + 1, + 1 + ], + "bin_edges": [ + -0.2802606523036957, + -0.25296658277511597, + -0.22567248344421387, + -0.19837841391563416, + -0.17108432948589325, + -0.14379024505615234, + -0.11649617552757263, + -0.08920209109783173, + -0.06190800666809082, + -0.034613922238349915, + -0.007319837808609009, + 0.019974231719970703, + 0.047268301248550415, + 0.07456240057945251, + 0.10185647010803223, + 0.12915056943893433, + 0.15644463896751404, + 0.18373870849609375, + 0.21103280782699585, + 0.23832687735557556, + 0.26562100648880005 + ] + } + }, + "transformer.layers.1.4.ff.2.bias": { + "min": -0.14429129660129547, + "max": 0.07484762370586395, + "mean": -0.00908473040908575, + "std": 0.025672495365142822, + "abs_mean": 0.020597003400325775, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.8710619211196899, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 1, + 1, + 2, + 2, + 10, + 23, + 39, + 71, + 106, + 150, + 195, + 182, + 111, + 56, + 29, + 15, + 3, + 3 + ], + "bin_edges": [ + -0.14429129660129547, + -0.13333435356616974, + -0.12237740308046341, + -0.11142046004533768, + -0.10046350955963135, + -0.08950656652450562, + -0.07854962348937988, + -0.06759267300367355, + -0.05663572996854782, + -0.04567878693342209, + -0.03472183644771576, + -0.023764893412590027, + -0.012807950377464294, + -0.001851007342338562, + 0.009105950593948364, + 0.020062893629074097, + 0.03101983666419983, + 0.04197677969932556, + 0.052933722734451294, + 0.06389068067073822, + 0.07484762370586395 + ] + } + }, + "transformer.layers.2.1.g": { + "min": 0.2402428686618805, + "max": 0.711609423160553, + "mean": 0.44710344076156616, + "std": 0.05906940996646881, + "abs_mean": 0.44710344076156616, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 14.431511878967285, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 1, + 8, + 19, + 24, + 59, + 107, + 162, + 181, + 162, + 113, + 69, + 43, + 23, + 10, + 10, + 2, + 4, + 1, + 1 + ], + "bin_edges": [ + 0.2402428686618805, + 0.2638112008571625, + 0.28737953305244446, + 0.31094783544540405, + 0.33451616764068604, + 0.358084499835968, + 0.38165283203125, + 0.405221164226532, + 0.42878949642181396, + 0.45235782861709595, + 0.47592616081237793, + 0.4994944930076599, + 0.5230628252029419, + 0.5466310977935791, + 0.5701994895935059, + 0.5937677621841431, + 0.6173361539840698, + 0.640904426574707, + 0.6644728183746338, + 0.688041090965271, + 0.711609423160553 + ] + } + }, + "transformer.layers.2.2.to_q.weight": { + "min": -0.27207210659980774, + "max": 0.29753801226615906, + "mean": 9.350538675789721e-06, + "std": 0.035469669848680496, + "abs_mean": 0.027727492153644562, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 36.32040023803711, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 0, + 0, + 6, + 10, + 15, + 32, + 57, + 94, + 124, + 148, + 143, + 143, + 90, + 66, + 37, + 23, + 7, + 1, + 3 + ], + "bin_edges": [ + -0.14873576164245605, + -0.13538120687007904, + -0.12202665209770203, + -0.10867208987474442, + -0.0953175351023674, + -0.08196298032999039, + -0.06860841810703278, + -0.05525386333465576, + -0.04189930856227875, + -0.028544753789901733, + -0.01519019901752472, + -0.001835644245147705, + 0.011518925428390503, + 0.024873480200767517, + 0.03822803497314453, + 0.051582589745521545, + 0.06493714451789856, + 0.07829169929027557, + 0.09164625406265259, + 0.1050008237361908, + 0.11835535615682602 + ] + } + }, + "transformer.layers.2.2.to_q.bias": { + "min": -0.11918215453624725, + "max": 0.1183757483959198, + "mean": 0.0007599537493661046, + "std": 0.027609599754214287, + "abs_mean": 0.020609423518180847, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.8834105134010315, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 2, + 4, + 5, + 9, + 10, + 37, + 84, + 141, + 188, + 201, + 134, + 88, + 51, + 24, + 10, + 7, + 2, + 1, + 1 + ], + "bin_edges": [ + -0.11918215453624725, + -0.10730426013469696, + -0.09542636573314667, + -0.08354847133159637, + -0.07167057693004608, + -0.05979267880320549, + -0.0479147806763649, + -0.036036886274814606, + -0.024158991873264313, + -0.01228109747171402, + -0.0004032030701637268, + 0.011474698781967163, + 0.023352593183517456, + 0.03523048758506775, + 0.04710838198661804, + 0.058986276388168335, + 0.07086417078971863, + 0.08274206519126892, + 0.09461995959281921, + 0.1064978539943695, + 0.1183757483959198 + ] + } + }, + "transformer.layers.2.2.to_k.weight": { + "min": -0.2805421054363251, + "max": 0.2793859839439392, + "mean": -7.715764513704926e-05, + "std": 0.035099178552627563, + "abs_mean": 0.027376187965273857, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 35.94107437133789, + "elements": 1048576, + "histogram": { + "counts": [ + 3, + 2, + 2, + 7, + 17, + 20, + 42, + 64, + 102, + 133, + 120, + 140, + 117, + 88, + 57, + 35, + 24, + 17, + 5, + 5 + ], + "bin_edges": [ + -0.12107668071985245, + -0.11005239933729172, + -0.09902812540531158, + -0.08800384402275085, + -0.07697956264019012, + -0.06595528870820999, + -0.05493100732564926, + -0.04390673339366913, + -0.0328824520111084, + -0.02185817062854767, + -0.010833896696567535, + 0.00019038468599319458, + 0.011214666068553925, + 0.022238947451114655, + 0.03326321393251419, + 0.04428749531507492, + 0.05531177669763565, + 0.06633605808019638, + 0.07736033946275711, + 0.08838460594415665, + 0.09940888732671738 + ] + } + }, + "transformer.layers.2.2.to_k.bias": { + "min": -2.506035566329956, + "max": 2.518012046813965, + "mean": 0.026713747531175613, + "std": 0.5862806439399719, + "abs_mean": 0.39693859219551086, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 18.77129364013672, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 1, + 2, + 4, + 10, + 27, + 28, + 52, + 128, + 249, + 256, + 101, + 57, + 31, + 19, + 9, + 7, + 10, + 4, + 4 + ], + "bin_edges": [ + -2.506035566329956, + -2.254833221435547, + -2.0036306381225586, + -1.7524282932281494, + -1.5012259483337402, + -1.250023603439331, + -0.9988211393356323, + -0.7476186752319336, + -0.4964163303375244, + -0.24521398544311523, + 0.005988359451293945, + 0.2571909427642822, + 0.5083932876586914, + 0.7595956325531006, + 1.0107982158660889, + 1.262000560760498, + 1.5132029056549072, + 1.7644054889678955, + 2.0156075954437256, + 2.266810178756714, + 2.518012046813965 + ] + } + }, + "transformer.layers.2.2.to_v.weight": { + "min": -0.22091814875602722, + "max": 0.27132153511047363, + "mean": 2.8913418645970523e-06, + "std": 0.0307327788323164, + "abs_mean": 0.023796094581484795, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 31.469972610473633, + "elements": 1048576, + "histogram": { + "counts": [ + 3, + 2, + 6, + 11, + 31, + 45, + 87, + 131, + 146, + 159, + 142, + 90, + 67, + 37, + 21, + 10, + 5, + 2, + 3, + 2 + ], + "bin_edges": [ + -0.10520875453948975, + -0.09382228553295135, + -0.08243580907583237, + -0.07104934006929398, + -0.059662867337465286, + -0.0482763946056366, + -0.036889925599098206, + -0.025503449141979218, + -0.014116980135440826, + -0.0027305111289024353, + 0.008655965328216553, + 0.020042434334754944, + 0.031428903341293335, + 0.042815372347831726, + 0.05420185625553131, + 0.0655883252620697, + 0.0769747942686081, + 0.08836126327514648, + 0.09974773228168488, + 0.11113421618938446, + 0.12252067774534225 + ] + } + }, + "transformer.layers.2.2.to_v.bias": { + "min": -0.03352135419845581, + "max": 0.03120853193104267, + "mean": 0.00011218251165701076, + "std": 0.012406233698129654, + "abs_mean": 0.010367151349782944, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.3968217968940735, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 2, + 11, + 27, + 43, + 52, + 82, + 86, + 82, + 81, + 79, + 91, + 87, + 74, + 69, + 56, + 34, + 27, + 11, + 5 + ], + "bin_edges": [ + -0.03352135419845581, + -0.030284859240055084, + -0.027048366144299507, + -0.02381187118589878, + -0.020575378090143204, + -0.017338883131742477, + -0.014102388173341751, + -0.010865895077586174, + -0.007629400119185448, + -0.004392905160784721, + -0.0011564120650291443, + 0.002080082893371582, + 0.005316577851772308, + 0.008553072810173035, + 0.011789564043283463, + 0.015026059001684189, + 0.018262553960084915, + 0.02149904891848564, + 0.024735543876886368, + 0.027972035109996796, + 0.03120853193104267 + ] + } + }, + "transformer.layers.2.2.to_out.0.weight": { + "min": -0.2351619005203247, + "max": 0.23147742450237274, + "mean": 5.6937635235954076e-05, + "std": 0.0256962887942791, + "abs_mean": 0.019686853513121605, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 26.312692642211914, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 0, + 2, + 0, + 1, + 4, + 10, + 21, + 55, + 142, + 188, + 196, + 179, + 107, + 65, + 16, + 6, + 4, + 2, + 1 + ], + "bin_edges": [ + -0.14605475962162018, + -0.1331682950258255, + -0.12028183043003082, + -0.10739536583423615, + -0.09450890123844147, + -0.08162243664264679, + -0.06873597204685211, + -0.055849507451057434, + -0.042963042855262756, + -0.03007657825946808, + -0.0171901136636734, + -0.004303649067878723, + 0.008582815527915955, + 0.021469280123710632, + 0.03435574471950531, + 0.04724220931529999, + 0.060128673911094666, + 0.07301513850688934, + 0.08590160310268402, + 0.0987880676984787, + 0.11167454719543457 + ] + } + }, + "transformer.layers.2.2.to_out.0.bias": { + "min": -0.1356453150510788, + "max": 0.1271977722644806, + "mean": -0.005494291428476572, + "std": 0.0399438738822937, + "abs_mean": 0.03177820146083832, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.2896206378936768, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 2, + 7, + 18, + 27, + 46, + 68, + 92, + 105, + 146, + 131, + 102, + 117, + 64, + 33, + 18, + 8, + 6, + 4, + 5 + ], + "bin_edges": [ + -0.1356453150510788, + -0.12250316143035889, + -0.10936100780963898, + -0.09621885418891907, + -0.08307670056819916, + -0.06993454694747925, + -0.05679239332675934, + -0.04365023970603943, + -0.03050808608531952, + -0.01736593246459961, + -0.0042237788438797, + 0.00891837477684021, + 0.02206052839756012, + 0.03520268201828003, + 0.04834483563899994, + 0.06148698925971985, + 0.07462914288043976, + 0.08777129650115967, + 0.10091345012187958, + 0.11405560374259949, + 0.1271977722644806 + ] + } + }, + "transformer.layers.2.3.g": { + "min": 0.3544028699398041, + "max": 1.1697261333465576, + "mean": 0.7103750109672546, + "std": 0.10338432341814041, + "abs_mean": 0.7103750109672546, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 22.97124481201172, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 5, + 14, + 33, + 41, + 38, + 82, + 136, + 164, + 153, + 148, + 123, + 40, + 10, + 4, + 2, + 3, + 1, + 0, + 1 + ], + "bin_edges": [ + 0.3544028699398041, + 0.3951690196990967, + 0.43593519926071167, + 0.4767013490200043, + 0.5174674987792969, + 0.5582336783409119, + 0.5989998579025269, + 0.6397659778594971, + 0.6805321574211121, + 0.721298336982727, + 0.7620644569396973, + 0.8028306365013123, + 0.8435968160629272, + 0.8843629360198975, + 0.9251291751861572, + 0.9658952951431274, + 1.0066614151000977, + 1.0474276542663574, + 1.0881937742233276, + 1.1289598941802979, + 1.1697261333465576 + ] + } + }, + "transformer.layers.2.4.ff.0.0.weight": { + "min": -0.6172477006912231, + "max": 0.5542004108428955, + "mean": 0.001160221640020609, + "std": 0.046119727194309235, + "abs_mean": 0.03523973003029823, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 94.47351837158203, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 0, + 0, + 3, + 3, + 7, + 18, + 37, + 76, + 106, + 169, + 199, + 171, + 119, + 57, + 19, + 8, + 1, + 5, + 1 + ], + "bin_edges": [ + -0.2309630662202835, + -0.21040646731853485, + -0.1898498833179474, + -0.16929328441619873, + -0.14873668551445007, + -0.12818008661270142, + -0.10762349516153336, + -0.0870669037103653, + -0.06651030480861664, + -0.04595370590686798, + -0.025397107005119324, + -0.00484052300453186, + 0.015716075897216797, + 0.03627265989780426, + 0.05682925879955292, + 0.07738585770130157, + 0.09794245660305023, + 0.11849905550479889, + 0.13905565440654755, + 0.1596122533082962, + 0.18016882240772247 + ] + } + }, + "transformer.layers.2.4.ff.0.0.bias": { + "min": -0.18825410306453705, + "max": 0.024966172873973846, + "mean": -0.03482227772474289, + "std": 0.02857418917119503, + "abs_mean": 0.03584485873579979, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 2.8827545642852783, + "elements": 4096, + "histogram": { + "counts": [ + 2, + 1, + 2, + 0, + 0, + 2, + 2, + 8, + 16, + 19, + 39, + 62, + 65, + 100, + 136, + 167, + 173, + 124, + 61, + 21 + ], + "bin_edges": [ + -0.18825410306453705, + -0.17785300314426422, + -0.1674518883228302, + -0.15705078840255737, + -0.14664968848228455, + -0.13624857366085052, + -0.1258474737405777, + -0.11544636636972427, + -0.10504525899887085, + -0.09464415162801743, + -0.084243044257164, + -0.07384194433689117, + -0.06344083696603775, + -0.053039729595184326, + -0.0426386296749115, + -0.03223751485347748, + -0.02183641493320465, + -0.011435315012931824, + -0.0010342001914978027, + 0.009366899728775024, + 0.019768016412854195 + ] + } + }, + "transformer.layers.2.4.ff.2.weight": { + "min": -1.130850911140442, + "max": 0.9707417488098145, + "mean": 0.0003595067828428, + "std": 0.042347487062215805, + "abs_mean": 0.028345687314867973, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ], + "norm": 86.71993255615234, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 0, + 0, + 2, + 4, + 6, + 16, + 193, + 527, + 203, + 29, + 6, + 8, + 2, + 1, + 0, + 2 + ], + "bin_edges": [ + -0.5004795789718628, + -0.457177996635437, + -0.41387641429901123, + -0.37057486176490784, + -0.32727327942848206, + -0.2839716970920563, + -0.24067014455795288, + -0.1973685622215271, + -0.15406697988510132, + -0.11076539754867554, + -0.06746381521224976, + -0.024162262678146362, + 0.01913928985595703, + 0.06244087219238281, + 0.1057424545288086, + 0.14904403686523438, + 0.19234561920166016, + 0.23564720153808594, + 0.2789487838745117, + 0.3222503662109375, + 0.3655519187450409 + ] + } + }, + "transformer.layers.2.4.ff.2.bias": { + "min": -0.5971466898918152, + "max": 0.06270916759967804, + "mean": -0.004877141211181879, + "std": 0.02859053947031498, + "abs_mean": 0.0179099403321743, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.9276728630065918, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 2, + 71, + 449, + 428, + 48 + ], + "bin_edges": [ + -0.5971466898918152, + -0.5641539096832275, + -0.5311611294746399, + -0.49816831946372986, + -0.4651755094528198, + -0.4321827292442322, + -0.39918994903564453, + -0.3661971688270569, + -0.33320435881614685, + -0.3002115786075592, + -0.26721876859664917, + -0.23422598838806152, + -0.20123320817947388, + -0.16824039816856384, + -0.1352476179599762, + -0.10225480794906616, + -0.06926202774047852, + -0.03626924753189087, + -0.0032764673233032227, + 0.0297163724899292, + 0.06270916759967804 + ] + } + }, + "transformer.layers.3.1.g": { + "min": 0.3752330243587494, + "max": 0.9386839866638184, + "mean": 0.5923458337783813, + "std": 0.06656130403280258, + "abs_mean": 0.5923458337783813, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 19.07424545288086, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 2, + 5, + 21, + 44, + 127, + 226, + 222, + 136, + 74, + 45, + 30, + 30, + 12, + 15, + 4, + 3, + 2, + 0, + 1 + ], + "bin_edges": [ + 0.3752330243587494, + 0.4034055769443512, + 0.431578129529953, + 0.4597506523132324, + 0.48792320489883423, + 0.516095757484436, + 0.5442683100700378, + 0.5724408626556396, + 0.6006134152412415, + 0.6287859678268433, + 0.6569584608078003, + 0.6851310729980469, + 0.7133035659790039, + 0.7414761781692505, + 0.7696486711502075, + 0.7978212237358093, + 0.8259937763214111, + 0.8541663289070129, + 0.8823388814926147, + 0.9105113744735718, + 0.9386839866638184 + ] + } + }, + "transformer.layers.3.2.to_q.weight": { + "min": -0.3911682367324829, + "max": 0.3688437342643738, + "mean": 7.119165093172342e-05, + "std": 0.037188753485679626, + "abs_mean": 0.02899729274213314, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 38.08082580566406, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 1, + 9, + 10, + 18, + 47, + 66, + 127, + 164, + 197, + 152, + 97, + 51, + 32, + 19, + 2, + 2, + 3, + 1, + 1 + ], + "bin_edges": [ + -0.138386532664299, + -0.12326744198799133, + -0.10814835131168365, + -0.09302926063537598, + -0.0779101699590683, + -0.06279107928276062, + -0.04767198860645294, + -0.032552897930145264, + -0.017433807253837585, + -0.0023147165775299072, + 0.012804374098777771, + 0.02792346477508545, + 0.04304255545139313, + 0.058161646127700806, + 0.07328073680400848, + 0.08839982748031616, + 0.10351891815662384, + 0.11863799393177032, + 0.1337570995092392, + 0.14887620508670807, + 0.16399529576301575 + ] + } + }, + "transformer.layers.3.2.to_q.bias": { + "min": -0.11875540018081665, + "max": 0.13628698885440826, + "mean": 0.0009287752327509224, + "std": 0.029227793216705322, + "abs_mean": 0.021544134244322777, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.9353049397468567, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 4, + 1, + 5, + 9, + 31, + 61, + 130, + 190, + 210, + 153, + 93, + 62, + 19, + 10, + 9, + 5, + 4, + 1, + 1 + ], + "bin_edges": [ + -0.11875540018081665, + -0.1060032844543457, + -0.09325116127729416, + -0.08049904555082321, + -0.06774692237377167, + -0.05499480664730072, + -0.04224269092082977, + -0.02949056774377823, + -0.01673845201730728, + -0.003986336290836334, + 0.00876578688621521, + 0.021517902612686157, + 0.034270018339157104, + 0.04702213406562805, + 0.05977426469326019, + 0.07252638041973114, + 0.08527849614620209, + 0.09803061187267303, + 0.11078272759914398, + 0.12353485822677612, + 0.13628698885440826 + ] + } + }, + "transformer.layers.3.2.to_k.weight": { + "min": -0.6185974478721619, + "max": 0.5083587169647217, + "mean": 1.5249222997226752e-05, + "std": 0.036442261189222336, + "abs_mean": 0.02839103899896145, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 37.31633758544922, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 5, + 12, + 31, + 69, + 120, + 183, + 181, + 183, + 111, + 60, + 23, + 15, + 1, + 3, + 0, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.12961581349372864, + -0.11215391755104065, + -0.09469202160835266, + -0.07723013311624527, + -0.05976823717355728, + -0.04230634123086929, + -0.024844452738761902, + -0.007382556796073914, + 0.010079339146614075, + 0.027541235089302063, + 0.04500313103199005, + 0.06246502697467804, + 0.07992690801620483, + 0.09738880395889282, + 0.11485069990158081, + 0.1323125958442688, + 0.1497744917869568, + 0.16723638772964478, + 0.18469828367233276, + 0.20216017961502075, + 0.21962207555770874 + ] + } + }, + "transformer.layers.3.2.to_k.bias": { + "min": -8.17552661895752, + "max": 8.776671409606934, + "mean": -0.1091664582490921, + "std": 1.6969325542449951, + "abs_mean": 0.9193365573883057, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 54.38762283325195, + "elements": 1024, + "histogram": { + "counts": [ + 7, + 3, + 8, + 10, + 6, + 8, + 21, + 40, + 111, + 518, + 172, + 46, + 15, + 4, + 11, + 13, + 2, + 2, + 2, + 1 + ], + "bin_edges": [ + -8.17552661895752, + -7.327916622161865, + -6.480306625366211, + -5.632697105407715, + -4.7850871086120605, + -3.9374771118164062, + -3.08986759185791, + -2.242257595062256, + -1.3946475982666016, + -0.5470376014709473, + 0.30057239532470703, + 1.1481819152832031, + 1.9957914352416992, + 2.8434019088745117, + 3.691011428833008, + 4.53862190246582, + 5.386231422424316, + 6.2338409423828125, + 7.081451416015625, + 7.929060935974121, + 8.776671409606934 + ] + } + }, + "transformer.layers.3.2.to_v.weight": { + "min": -0.27638494968414307, + "max": 0.23973813652992249, + "mean": 5.3197330998955294e-05, + "std": 0.03261549770832062, + "abs_mean": 0.025414835661649704, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 33.397850036621094, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 2, + 2, + 13, + 12, + 38, + 70, + 93, + 110, + 158, + 149, + 123, + 97, + 51, + 44, + 19, + 12, + 3, + 1, + 2 + ], + "bin_edges": [ + -0.11360769718885422, + -0.10233889520168304, + -0.09107010066509247, + -0.0798012986779213, + -0.06853249669075012, + -0.05726369470357895, + -0.04599490016698837, + -0.0347260981798172, + -0.023457296192646027, + -0.012188494205474854, + -0.0009196922183036804, + 0.010349102318286896, + 0.021617896854877472, + 0.03288670629262924, + 0.04415550082921982, + 0.05542431026697159, + 0.06669310480356216, + 0.07796189934015274, + 0.08923070877790451, + 0.10049950331449509, + 0.11176831275224686 + ] + } + }, + "transformer.layers.3.2.to_v.bias": { + "min": -0.051992662250995636, + "max": 0.03946495056152344, + "mean": 9.150505502475426e-05, + "std": 0.012954742647707462, + "abs_mean": 0.010758287273347378, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.4143596589565277, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 2, + 2, + 15, + 34, + 77, + 89, + 114, + 120, + 105, + 128, + 106, + 105, + 61, + 27, + 9, + 4, + 1 + ], + "bin_edges": [ + -0.051992662250995636, + -0.04741978272795677, + -0.04284690320491791, + -0.038274019956588745, + -0.03370114043354988, + -0.029128260910511017, + -0.024555379524827003, + -0.01998249813914299, + -0.015409618616104126, + -0.010836739093065262, + -0.006263859570026398, + -0.001690976321697235, + 0.002881903201341629, + 0.007454782724380493, + 0.012027665972709656, + 0.01660054177045822, + 0.021173425018787384, + 0.025746308267116547, + 0.030319184064865112, + 0.034892067313194275, + 0.03946495056152344 + ] + } + }, + "transformer.layers.3.2.to_out.0.weight": { + "min": -0.23067787289619446, + "max": 0.23443163931369781, + "mean": -2.1657757315551862e-05, + "std": 0.029391853138804436, + "abs_mean": 0.022796550765633583, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 30.09688377380371, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 2, + 4, + 11, + 35, + 54, + 100, + 147, + 188, + 141, + 157, + 84, + 41, + 18, + 9, + 3, + 1, + 2, + 0, + 1 + ], + "bin_edges": [ + -0.10899056494235992, + -0.09656518697738647, + -0.08413980901241302, + -0.07171443849802017, + -0.05928906053304672, + -0.04686368256807327, + -0.03443831205368042, + -0.02201293408870697, + -0.00958755612373352, + 0.002837821841239929, + 0.015263199806213379, + 0.02768857777118683, + 0.040113940834999084, + 0.052539318799972534, + 0.06496469676494598, + 0.07739007472991943, + 0.08981545269489288, + 0.10224083065986633, + 0.11466620862483978, + 0.12709158658981323, + 0.1395169496536255 + ] + } + }, + "transformer.layers.3.2.to_out.0.bias": { + "min": -0.20401200652122498, + "max": 0.10544212907552719, + "mean": -0.004023304674774408, + "std": 0.0326065756380558, + "abs_mean": 0.02599199116230011, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.050817608833313, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 0, + 0, + 1, + 7, + 15, + 58, + 89, + 166, + 170, + 191, + 149, + 87, + 44, + 17, + 3, + 2 + ], + "bin_edges": [ + -0.20401200652122498, + -0.1885392963886261, + -0.17306658625602722, + -0.15759387612342834, + -0.14212118089199066, + -0.12664847075939178, + -0.11117576062679291, + -0.09570305794477463, + -0.08023034781217575, + -0.06475764513015747, + -0.049284934997558594, + -0.03381222486495972, + -0.01833951473236084, + -0.002866804599761963, + 0.01260589063167572, + 0.028078600764274597, + 0.043551310896873474, + 0.05902400612831116, + 0.07449671626091003, + 0.08996942639350891, + 0.10544212907552719 + ] + } + }, + "transformer.layers.3.3.g": { + "min": 0.33983615040779114, + "max": 1.0106816291809082, + "mean": 0.7006407380104065, + "std": 0.09645594656467438, + "abs_mean": 0.7006407380104065, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 22.63176155090332, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 4, + 9, + 15, + 17, + 31, + 30, + 59, + 73, + 110, + 135, + 120, + 169, + 133, + 72, + 16, + 2, + 0, + 1, + 3 + ], + "bin_edges": [ + 0.33983615040779114, + 0.3733784258365631, + 0.4069207012653351, + 0.44046297669410706, + 0.47400525212287903, + 0.5075474977493286, + 0.5410897731781006, + 0.5746320486068726, + 0.6081743240356445, + 0.6417165994644165, + 0.6752588748931885, + 0.7088011503219604, + 0.7423434257507324, + 0.7758857011795044, + 0.8094279766082764, + 0.8429702520370483, + 0.8765125274658203, + 0.9100548028945923, + 0.9435970783233643, + 0.9771393537521362, + 1.0106816291809082 + ] + } + }, + "transformer.layers.3.4.ff.0.0.weight": { + "min": -0.5642791390419006, + "max": 0.832179069519043, + "mean": 0.00041513508767820895, + "std": 0.042302437126636505, + "abs_mean": 0.03297626972198486, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 86.63153839111328, + "elements": 4194304, + "histogram": { + "counts": [ + 5, + 4, + 13, + 24, + 42, + 64, + 116, + 144, + 143, + 136, + 109, + 80, + 53, + 30, + 16, + 7, + 5, + 6, + 1, + 2 + ], + "bin_edges": [ + -0.12684103846549988, + -0.11227414011955261, + -0.09770724177360535, + -0.08314034342765808, + -0.06857344508171082, + -0.05400654673576355, + -0.039439648389816284, + -0.02487275004386902, + -0.010305851697921753, + 0.004261046648025513, + 0.01882794499397278, + 0.033394843339920044, + 0.04796174168586731, + 0.06252864003181458, + 0.07709553837776184, + 0.0916624367237091, + 0.10622933506965637, + 0.12079623341560364, + 0.1353631317615509, + 0.14993003010749817, + 0.16449694335460663 + ] + } + }, + "transformer.layers.3.4.ff.0.0.bias": { + "min": -0.21134838461875916, + "max": 0.030589817091822624, + "mean": -0.032172758132219315, + "std": 0.026476319879293442, + "abs_mean": 0.03335808217525482, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 2.6665127277374268, + "elements": 4096, + "histogram": { + "counts": [ + 1, + 1, + 3, + 2, + 2, + 4, + 8, + 15, + 29, + 34, + 42, + 92, + 120, + 123, + 186, + 155, + 94, + 63, + 21, + 5 + ], + "bin_edges": [ + -0.1719832420349121, + -0.1618545949459076, + -0.15172593295574188, + -0.14159728586673737, + -0.13146862387657166, + -0.12133997678756714, + -0.11121132969856262, + -0.10108267515897751, + -0.0909540206193924, + -0.08082536607980728, + -0.07069671154022217, + -0.06056806445121765, + -0.05043940991163254, + -0.040310755372047424, + -0.030182108283042908, + -0.020053446292877197, + -0.00992479920387268, + 0.00020384788513183594, + 0.010332509875297546, + 0.020461156964302063, + 0.030589817091822624 + ] + } + }, + "transformer.layers.3.4.ff.2.weight": { + "min": -0.7536408305168152, + "max": 0.717832088470459, + "mean": -9.409409358340781e-06, + "std": 0.03684220835566521, + "abs_mean": 0.027992695569992065, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ], + "norm": 75.44456481933594, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 0, + 3, + 1, + 5, + 24, + 49, + 104, + 159, + 198, + 187, + 124, + 76, + 37, + 22, + 6, + 2, + 1, + 0, + 1 + ], + "bin_edges": [ + -0.15810149908065796, + -0.14198999106884003, + -0.1258784681558609, + -0.10976696014404297, + -0.09365544468164444, + -0.07754392921924591, + -0.06143242120742798, + -0.04532090574502945, + -0.02920939028263092, + -0.013097882270812988, + 0.0030136406421661377, + 0.01912514865398407, + 0.035236656665802, + 0.05134817957878113, + 0.06745968759059906, + 0.08357121050357819, + 0.09968271851539612, + 0.11579424142837524, + 0.13190573453903198, + 0.1480172574520111, + 0.16412878036499023 + ] + } + }, + "transformer.layers.3.4.ff.2.bias": { + "min": -0.2631220519542694, + "max": 0.10570736974477768, + "mean": -0.003029324347153306, + "std": 0.028848078101873398, + "abs_mean": 0.02227037213742733, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.9277658462524414, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 23, + 59, + 144, + 244, + 248, + 180, + 72, + 20, + 8, + 1 + ], + "bin_edges": [ + -0.2631220519542694, + -0.24468058347702026, + -0.22623911499977112, + -0.20779763162136078, + -0.18935616314411163, + -0.1709146946668625, + -0.15247321128845215, + -0.134031742811203, + -0.11559027433395386, + -0.09714880585670471, + -0.07870733737945557, + -0.06026585400104523, + -0.04182438552379608, + -0.023382917046546936, + -0.004941433668136597, + 0.013500034809112549, + 0.031941503286361694, + 0.05038297176361084, + 0.06882444024085999, + 0.08726590871810913, + 0.10570736974477768 + ] + } + }, + "transformer.layers.4.1.g": { + "min": 0.28446710109710693, + "max": 0.6937389373779297, + "mean": 0.49939653277397156, + "std": 0.04629269987344742, + "abs_mean": 0.49939653277397156, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 16.049135208129883, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 0, + 4, + 5, + 8, + 7, + 33, + 57, + 94, + 183, + 193, + 175, + 128, + 56, + 36, + 13, + 3, + 1, + 0, + 2 + ], + "bin_edges": [ + 0.28446710109710693, + 0.3049306869506836, + 0.32539427280426025, + 0.3458578586578369, + 0.36632147431373596, + 0.3867850601673126, + 0.4072486460208893, + 0.42771226167678833, + 0.448175847530365, + 0.46863943338394165, + 0.4891030192375183, + 0.509566605091095, + 0.5300301909446716, + 0.5504937767982483, + 0.5709574222564697, + 0.5914210081100464, + 0.611884593963623, + 0.6323481798171997, + 0.6528117656707764, + 0.673275351524353, + 0.6937389373779297 + ] + } + }, + "transformer.layers.4.2.to_q.weight": { + "min": -0.27887189388275146, + "max": 0.23408503830432892, + "mean": -0.00011133109364891425, + "std": 0.03876320272684097, + "abs_mean": 0.030566837638616562, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 39.69315719604492, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 0, + 2, + 4, + 8, + 27, + 51, + 107, + 157, + 166, + 166, + 137, + 95, + 47, + 18, + 8, + 6 + ], + "bin_edges": [ + -0.22455070912837982, + -0.2071923017501831, + -0.1898338794708252, + -0.17247545719146729, + -0.15511704981327057, + -0.13775864243507385, + -0.12040022015571594, + -0.10304180532693863, + -0.08568339049816132, + -0.0683249682188034, + -0.05096656084060669, + -0.03360815346240997, + -0.016249731183052063, + 0.0011086910963058472, + 0.018467098474502563, + 0.03582550585269928, + 0.05318392813205719, + 0.0705423504114151, + 0.08790077269077301, + 0.10525916516780853, + 0.12261758744716644 + ] + } + }, + "transformer.layers.4.2.to_q.bias": { + "min": -0.15426576137542725, + "max": 0.1266399770975113, + "mean": -0.0022300498094409704, + "std": 0.0333842970430851, + "abs_mean": 0.024381492286920547, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.070157766342163, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 2, + 7, + 7, + 18, + 21, + 40, + 73, + 151, + 224, + 190, + 113, + 81, + 37, + 13, + 10, + 5, + 5, + 2 + ], + "bin_edges": [ + -0.15426576137542725, + -0.14022047817707062, + -0.126175194978714, + -0.11212990432977676, + -0.09808461368083954, + -0.08403933048248291, + -0.06999404728412628, + -0.055948756635189056, + -0.04190347343683243, + -0.027858182787895203, + -0.013812899589538574, + 0.0002323836088180542, + 0.014277666807174683, + 0.02832295000553131, + 0.04236824810504913, + 0.05641353130340576, + 0.07045881450176239, + 0.08450409770011902, + 0.09854939579963684, + 0.11259466409683228, + 0.1266399770975113 + ] + } + }, + "transformer.layers.4.2.to_k.weight": { + "min": -0.41348376870155334, + "max": 0.6593844294548035, + "mean": -1.978595719265286e-05, + "std": 0.039100244641304016, + "abs_mean": 0.030760983005166054, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 40.038177490234375, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 1, + 2, + 3, + 6, + 21, + 30, + 55, + 70, + 122, + 150, + 140, + 132, + 110, + 73, + 39, + 22, + 11, + 8, + 4 + ], + "bin_edges": [ + -0.14758364856243134, + -0.1345057636499405, + -0.12142786383628845, + -0.10834997892379761, + -0.09527208656072617, + -0.08219419419765472, + -0.06911630928516388, + -0.05603841692209244, + -0.042960524559020996, + -0.029882632195949554, + -0.016804739832878113, + -0.003726854920387268, + 0.009351029992103577, + 0.022428929805755615, + 0.03550681471824646, + 0.0485847145318985, + 0.06166259944438934, + 0.07474048435688019, + 0.08781838417053223, + 0.10089626908302307, + 0.11397416889667511 + ] + } + }, + "transformer.layers.4.2.to_k.bias": { + "min": -4.232041358947754, + "max": 4.715827465057373, + "mean": -0.020488303154706955, + "std": 1.0068391561508179, + "abs_mean": 0.6846990585327148, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 32.20978927612305, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 5, + 4, + 7, + 14, + 20, + 55, + 88, + 179, + 306, + 165, + 63, + 35, + 23, + 16, + 5, + 8, + 2, + 1, + 2 + ], + "bin_edges": [ + -4.232041358947754, + -3.7846479415893555, + -3.337254524230957, + -2.8898611068725586, + -2.44246768951416, + -1.9950742721557617, + -1.5476808547973633, + -1.1002874374389648, + -0.6528940200805664, + -0.20550060272216797, + 0.24189281463623047, + 0.6892862319946289, + 1.1366796493530273, + 1.5840730667114258, + 2.031466484069824, + 2.4788599014282227, + 2.926253318786621, + 3.3736467361450195, + 3.821040153503418, + 4.268433570861816, + 4.715827465057373 + ] + } + }, + "transformer.layers.4.2.to_v.weight": { + "min": -0.24481239914894104, + "max": 0.2074868232011795, + "mean": 4.380439349915832e-05, + "std": 0.03396626561880112, + "abs_mean": 0.026826273649930954, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 34.780982971191406, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 1, + 12, + 17, + 44, + 59, + 117, + 165, + 159, + 138, + 130, + 76, + 33, + 17, + 18, + 10, + 1, + 1, + 0, + 1 + ], + "bin_edges": [ + -0.11701656132936478, + -0.10330986231565475, + -0.08960317075252533, + -0.07589647173881531, + -0.062189772725105286, + -0.048483073711395264, + -0.03477638214826584, + -0.021069683134555817, + -0.007362984120845795, + 0.006343714892864227, + 0.02005041390657425, + 0.03375711292028427, + 0.0474637970328331, + 0.06117049604654312, + 0.07487719506025314, + 0.08858389407396317, + 0.10229059308767319, + 0.11599729210138321, + 0.12970399856567383, + 0.14341068267822266, + 0.15711738169193268 + ] + } + }, + "transformer.layers.4.2.to_v.bias": { + "min": -0.03449943661689758, + "max": 0.044728994369506836, + "mean": -1.8020247807726264e-05, + "std": 0.012624197639524937, + "abs_mean": 0.010656064376235008, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.40377742052078247, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 3, + 11, + 48, + 74, + 99, + 104, + 94, + 97, + 102, + 97, + 86, + 75, + 70, + 27, + 7, + 2, + 0, + 1, + 1 + ], + "bin_edges": [ + -0.03449943661689758, + -0.030538015067577362, + -0.02657659351825714, + -0.02261517196893692, + -0.0186537504196167, + -0.014692328870296478, + -0.010730907320976257, + -0.006769485771656036, + -0.0028080642223358154, + 0.0011533573269844055, + 0.0051147788763046265, + 0.009076200425624847, + 0.013037621974945068, + 0.01699904352426529, + 0.02096046507358551, + 0.02492188662290573, + 0.028883308172225952, + 0.03284472972154617, + 0.036806151270866394, + 0.040767572820186615, + 0.044728994369506836 + ] + } + }, + "transformer.layers.4.2.to_out.0.weight": { + "min": -0.20050014555454254, + "max": 0.20566238462924957, + "mean": -2.9678063583560288e-05, + "std": 0.03102380409836769, + "abs_mean": 0.024439673870801926, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 31.76807975769043, + "elements": 1048576, + "histogram": { + "counts": [ + 5, + 13, + 17, + 49, + 81, + 119, + 125, + 139, + 161, + 113, + 76, + 46, + 30, + 8, + 9, + 3, + 4, + 1, + 0, + 1 + ], + "bin_edges": [ + -0.08439754694700241, + -0.07326537370681763, + -0.06213320419192314, + -0.051001034677028656, + -0.03986886143684387, + -0.028736688196659088, + -0.0176045224070549, + -0.006472349166870117, + 0.004659824073314667, + 0.01579199731349945, + 0.026924170553684235, + 0.03805633634328842, + 0.04918850213289261, + 0.06032068282365799, + 0.07145284861326218, + 0.08258502930402756, + 0.09371719509363174, + 0.10484936088323593, + 0.11598154157400131, + 0.1271136999130249, + 0.13824589550495148 + ] + } + }, + "transformer.layers.4.2.to_out.0.bias": { + "min": -0.19964830577373505, + "max": 0.11326169967651367, + "mean": -0.00291792256757617, + "std": 0.03448895364999771, + "abs_mean": 0.02703409641981125, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.107052206993103, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 0, + 1, + 4, + 16, + 25, + 64, + 119, + 165, + 188, + 162, + 123, + 73, + 27, + 27, + 3, + 2 + ], + "bin_edges": [ + -0.19964830577373505, + -0.1840028017759323, + -0.16835731267929077, + -0.15271180868148804, + -0.1370663046836853, + -0.12142080068588257, + -0.10577530413866043, + -0.0901298075914383, + -0.07448430359363556, + -0.058838799595832825, + -0.04319329559803009, + -0.02754780650138855, + -0.011902302503585815, + 0.003743201494216919, + 0.01938869059085846, + 0.035034194588661194, + 0.05067969858646393, + 0.06632520258426666, + 0.0819707065820694, + 0.09761621057987213, + 0.11326169967651367 + ] + } + }, + "transformer.layers.4.3.g": { + "min": 0.36708179116249084, + "max": 1.0548574924468994, + "mean": 0.6704699397087097, + "std": 0.06616173684597015, + "abs_mean": 0.6704699397087097, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 21.559146881103516, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 8, + 5, + 8, + 16, + 41, + 68, + 101, + 219, + 272, + 189, + 59, + 8, + 1, + 0, + 1, + 0, + 1, + 0, + 1 + ], + "bin_edges": [ + 0.36708179116249084, + 0.4014705717563629, + 0.4358593821525574, + 0.47024816274642944, + 0.5046369433403015, + 0.5390257239341736, + 0.5734145045280457, + 0.6078033447265625, + 0.6421921253204346, + 0.6765809059143066, + 0.7109696865081787, + 0.7453584671020508, + 0.7797472476959229, + 0.8141360282897949, + 0.848524808883667, + 0.8829135894775391, + 0.9173023700714111, + 0.9516911506652832, + 0.9860799312591553, + 1.0204687118530273, + 1.0548574924468994 + ] + } + }, + "transformer.layers.4.4.ff.0.0.weight": { + "min": -0.397816002368927, + "max": 0.5021188855171204, + "mean": -3.856579860439524e-05, + "std": 0.041137274354696274, + "abs_mean": 0.03244972229003906, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 84.24102783203125, + "elements": 4194304, + "histogram": { + "counts": [ + 3, + 3, + 14, + 16, + 38, + 63, + 98, + 132, + 140, + 160, + 116, + 89, + 69, + 29, + 17, + 5, + 5, + 1, + 0, + 2 + ], + "bin_edges": [ + -0.13683238625526428, + -0.12173408269882202, + -0.10663577914237976, + -0.0915374681353569, + -0.07643916457891464, + -0.06134086102247238, + -0.046242550015449524, + -0.031144246459007263, + -0.016045942902565002, + -0.0009476393461227417, + 0.014150664210319519, + 0.02924896776676178, + 0.044347286224365234, + 0.059445589780807495, + 0.07454389333724976, + 0.08964219689369202, + 0.10474050045013428, + 0.11983880400657654, + 0.1349371075630188, + 0.15003541111946106, + 0.16513371467590332 + ] + } + }, + "transformer.layers.4.4.ff.0.0.bias": { + "min": -0.12784262001514435, + "max": 0.02675941213965416, + "mean": -0.030531462281942368, + "std": 0.02184327319264412, + "abs_mean": 0.03135973587632179, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 2.402501106262207, + "elements": 4096, + "histogram": { + "counts": [ + 1, + 1, + 3, + 7, + 7, + 9, + 17, + 21, + 46, + 66, + 82, + 111, + 106, + 135, + 128, + 96, + 85, + 47, + 21, + 11 + ], + "bin_edges": [ + -0.11856583505868912, + -0.11168412119150162, + -0.10480239987373352, + -0.09792068600654602, + -0.09103897213935852, + -0.08415725827217102, + -0.07727553695440292, + -0.07039382308721542, + -0.06351210176944733, + -0.05663038790225983, + -0.04974867403507233, + -0.04286696016788483, + -0.03598523885011673, + -0.02910352498292923, + -0.02222181111574173, + -0.015340089797973633, + -0.008458375930786133, + -0.0015766620635986328, + 0.005305059254169464, + 0.012186773121356964, + 0.019068485125899315 + ] + } + }, + "transformer.layers.4.4.ff.2.weight": { + "min": -0.4485797882080078, + "max": 0.43235480785369873, + "mean": 8.378911297768354e-05, + "std": 0.034896139055490494, + "abs_mean": 0.0270802304148674, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ], + "norm": 71.46002197265625, + "elements": 4194304, + "histogram": { + "counts": [ + 2, + 4, + 3, + 15, + 26, + 50, + 70, + 104, + 124, + 153, + 140, + 116, + 64, + 65, + 31, + 14, + 13, + 3, + 2, + 1 + ], + "bin_edges": [ + -0.11277943849563599, + -0.10100102424621582, + -0.08922261744737625, + -0.07744420319795609, + -0.06566579639911652, + -0.05388738214969635, + -0.042108967900276184, + -0.030330561101436615, + -0.01855214685201645, + -0.006773732602596283, + 0.005004674196243286, + 0.016783088445663452, + 0.028561502695083618, + 0.040339916944503784, + 0.052118316292762756, + 0.06389673054218292, + 0.07567514479160309, + 0.08745355904102325, + 0.09923197329044342, + 0.11101037263870239, + 0.12278879433870316 + ] + } + }, + "transformer.layers.4.4.ff.2.bias": { + "min": -0.26721277832984924, + "max": 0.07248232513666153, + "mean": -0.0011095060035586357, + "std": 0.023109637200832367, + "abs_mean": 0.01726192981004715, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.7399994730949402, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 4, + 20, + 78, + 179, + 306, + 263, + 116, + 32, + 1 + ], + "bin_edges": [ + -0.26721277832984924, + -0.25022801756858826, + -0.23324327170848846, + -0.21625851094722748, + -0.19927376508712769, + -0.1822890043258667, + -0.1653042435646057, + -0.14831948280334473, + -0.13133473694324493, + -0.11434997618198395, + -0.09736523032188416, + -0.08038046956062317, + -0.06339570879936218, + -0.04641096293926239, + -0.029426202178001404, + -0.012441456317901611, + 0.004543304443359375, + 0.02152806520462036, + 0.03851282596588135, + 0.055497556924819946, + 0.07248232513666153 + ] + } + }, + "transformer.layers.5.1.g": { + "min": 0.287344753742218, + "max": 0.6839542388916016, + "mean": 0.5244242548942566, + "std": 0.047291453927755356, + "abs_mean": 0.5244242548942566, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 16.849607467651367, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 0, + 2, + 2, + 5, + 6, + 12, + 17, + 34, + 94, + 152, + 158, + 183, + 152, + 101, + 53, + 16, + 3, + 5, + 3 + ], + "bin_edges": [ + 0.287344753742218, + 0.3071752190589905, + 0.3270057141780853, + 0.3468361794948578, + 0.36666664481163025, + 0.3864971399307251, + 0.40632760524749756, + 0.42615807056427, + 0.4459885358810425, + 0.46581903100013733, + 0.4856494963169098, + 0.5054799914360046, + 0.5253104567527771, + 0.5451409220695496, + 0.564971387386322, + 0.5848019123077393, + 0.6046323776245117, + 0.6244628429412842, + 0.6442933082580566, + 0.6641237735748291, + 0.6839542388916016 + ] + } + }, + "transformer.layers.5.2.to_q.weight": { + "min": -0.22201856970787048, + "max": 0.22311273217201233, + "mean": 1.577789407747332e-05, + "std": 0.038952890783548355, + "abs_mean": 0.030697684735059738, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 39.88728713989258, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 6, + 15, + 28, + 34, + 68, + 97, + 121, + 126, + 155, + 118, + 90, + 67, + 39, + 19, + 6, + 4, + 1, + 0, + 4 + ], + "bin_edges": [ + -0.12169166654348373, + -0.10821932554244995, + -0.09474697709083557, + -0.08127463608980179, + -0.067802295088768, + -0.054329946637153625, + -0.04085760563611984, + -0.027385257184505463, + -0.01391291618347168, + -0.00044057518243789673, + 0.013031773269176483, + 0.026504121720790863, + 0.03997645527124405, + 0.05344880372285843, + 0.06692115217447281, + 0.080393485724926, + 0.09386583417654037, + 0.10733818262815475, + 0.12081051617860794, + 0.13428285717964172, + 0.1477552056312561 + ] + } + }, + "transformer.layers.5.2.to_q.bias": { + "min": -0.13627174496650696, + "max": 0.1090594157576561, + "mean": 0.00023713918926659971, + "std": 0.029215561226010323, + "abs_mean": 0.020363088697195053, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.9344722032546997, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 1, + 2, + 10, + 8, + 4, + 13, + 30, + 50, + 124, + 221, + 230, + 156, + 75, + 31, + 13, + 8, + 8, + 10, + 5 + ], + "bin_edges": [ + -0.13627174496650696, + -0.12400518357753754, + -0.11173862963914871, + -0.09947206825017929, + -0.08720551431179047, + -0.07493895292282104, + -0.06267239153385162, + -0.0504058375954628, + -0.03813927620649338, + -0.025872714817523956, + -0.013606160879135132, + -0.0013395994901657104, + 0.010926961898803711, + 0.023193523287773132, + 0.03546006977558136, + 0.04772663116455078, + 0.0599931925535202, + 0.07225975394248962, + 0.08452631533145905, + 0.09679286181926727, + 0.1090594157576561 + ] + } + }, + "transformer.layers.5.2.to_k.weight": { + "min": -0.3747805953025818, + "max": 0.43678468465805054, + "mean": -9.573410352459177e-06, + "std": 0.03928905352950096, + "abs_mean": 0.030855529010295868, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 40.23149871826172, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 2, + 3, + 6, + 15, + 20, + 41, + 58, + 97, + 130, + 151, + 138, + 107, + 94, + 60, + 43, + 14, + 12, + 3, + 5 + ], + "bin_edges": [ + -0.1474566012620926, + -0.133971706032753, + -0.1204868033528328, + -0.1070019081234932, + -0.093517005443573, + -0.0800321102142334, + -0.0665472149848938, + -0.0530623123049736, + -0.039577417075634, + -0.026092521846294403, + -0.012607619166374207, + 0.0008772760629653931, + 0.014362171292304993, + 0.027847066521644592, + 0.041331976652145386, + 0.054816871881484985, + 0.06830176711082458, + 0.08178666234016418, + 0.09527155756950378, + 0.10875646770000458, + 0.12224137037992477 + ] + } + }, + "transformer.layers.5.2.to_k.bias": { + "min": -3.840266227722168, + "max": 4.992228984832764, + "mean": 0.009751387871801853, + "std": 0.8444771766662598, + "abs_mean": 0.5379955172538757, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 27.01187515258789, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 3, + 5, + 11, + 13, + 33, + 65, + 97, + 424, + 180, + 92, + 33, + 17, + 8, + 9, + 6, + 0, + 0, + 1, + 1 + ], + "bin_edges": [ + -3.840266227722168, + -3.398641586303711, + -2.957016706466675, + -2.5153920650482178, + -2.0737671852111816, + -1.6321425437927246, + -1.1905179023742676, + -0.7488930225372314, + -0.3072683811187744, + 0.13435626029968262, + 0.5759811401367188, + 1.0176057815551758, + 1.4592304229736328, + 1.9008550643920898, + 2.342480182647705, + 2.784104824066162, + 3.225729465484619, + 3.667354106903076, + 4.108978748321533, + 4.550603866577148, + 4.992228984832764 + ] + } + }, + "transformer.layers.5.2.to_v.weight": { + "min": -0.22314536571502686, + "max": 0.21986283361911774, + "mean": -2.0974857761757448e-07, + "std": 0.034413520246744156, + "abs_mean": 0.027191974222660065, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 35.238975524902344, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 0, + 0, + 8, + 13, + 28, + 46, + 96, + 111, + 135, + 134, + 130, + 109, + 73, + 49, + 26, + 24, + 10, + 2, + 4 + ], + "bin_edges": [ + -0.12520456314086914, + -0.11339918524026871, + -0.10159379988908768, + -0.08978842198848724, + -0.07798303663730621, + -0.06617765873670578, + -0.05437228083610535, + -0.042566895484924316, + -0.030761517584323883, + -0.01895613968372345, + -0.007150754332542419, + 0.004654631018638611, + 0.016460001468658447, + 0.028265386819839478, + 0.04007077217102051, + 0.051876142621040344, + 0.06368152797222137, + 0.0754869133234024, + 0.08729228377342224, + 0.09909766912460327, + 0.1109030619263649 + ] + } + }, + "transformer.layers.5.2.to_v.bias": { + "min": -0.043581560254096985, + "max": 0.03578736633062363, + "mean": -0.00025875651044771075, + "std": 0.012076529674232006, + "abs_mean": 0.01006263680756092, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.3863489329814911, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 1, + 13, + 35, + 55, + 77, + 110, + 126, + 102, + 104, + 94, + 104, + 87, + 47, + 29, + 11, + 2, + 2 + ], + "bin_edges": [ + -0.043581560254096985, + -0.039613112807273865, + -0.035644665360450745, + -0.03167622163891792, + -0.027707774192094803, + -0.023739326745271683, + -0.019770881161093712, + -0.01580243557691574, + -0.01183398813009262, + -0.0078655406832695, + -0.0038970932364463806, + 7.135048508644104e-05, + 0.004039797931909561, + 0.008008245378732681, + 0.011976689100265503, + 0.015945136547088623, + 0.019913583993911743, + 0.023882031440734863, + 0.027850478887557983, + 0.031818926334381104, + 0.03578736633062363 + ] + } + }, + "transformer.layers.5.2.to_out.0.weight": { + "min": -0.21286383271217346, + "max": 0.18843913078308105, + "mean": -1.6783855244284496e-05, + "std": 0.03154028207063675, + "abs_mean": 0.0248585008084774, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 32.29688262939453, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 3, + 7, + 8, + 30, + 33, + 60, + 86, + 96, + 139, + 134, + 119, + 113, + 71, + 33, + 44, + 7, + 7, + 7, + 2 + ], + "bin_edges": [ + -0.1055210754275322, + -0.09519147127866745, + -0.0848618745803833, + -0.07453227043151855, + -0.06420266628265381, + -0.05387306213378906, + -0.043543461710214615, + -0.03321386128664017, + -0.02288425713777542, + -0.012554652988910675, + -0.002225048840045929, + 0.00810454785823822, + 0.018434152007102966, + 0.028763748705387115, + 0.03909335285425186, + 0.04942295700311661, + 0.059752561151981354, + 0.0700821653008461, + 0.08041176944971085, + 0.09074137359857559, + 0.10107096284627914 + ] + } + }, + "transformer.layers.5.2.to_out.0.bias": { + "min": -0.18049854040145874, + "max": 0.12063688784837723, + "mean": -0.0024107899516820908, + "std": 0.04124762490391731, + "abs_mean": 0.03318789601325989, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.321532964706421, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 3, + 16, + 14, + 49, + 73, + 111, + 118, + 130, + 152, + 118, + 89, + 61, + 34, + 20, + 8, + 3 + ], + "bin_edges": [ + -0.18049854040145874, + -0.16544176638126373, + -0.15038499236106873, + -0.1353282332420349, + -0.1202714592218399, + -0.1052146852016449, + -0.09015791118144989, + -0.07510114461183548, + -0.06004437059164047, + -0.04498760402202606, + -0.029930830001831055, + -0.014874055981636047, + 0.00018271803855895996, + 0.015239492058753967, + 0.03029625117778778, + 0.04535302519798279, + 0.060409799218177795, + 0.0754665732383728, + 0.09052333235740662, + 0.10558012127876282, + 0.12063688784837723 + ] + } + }, + "transformer.layers.5.3.g": { + "min": 0.4223836064338684, + "max": 0.9401367902755737, + "mean": 0.6626168489456177, + "std": 0.05654710531234741, + "abs_mean": 0.6626168489456177, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 21.280733108520508, + "elements": 1024, + "histogram": { + "counts": [ + 4, + 9, + 2, + 13, + 14, + 35, + 48, + 97, + 157, + 217, + 232, + 101, + 56, + 9, + 2, + 2, + 0, + 0, + 1, + 1 + ], + "bin_edges": [ + 0.4223836064338684, + 0.4482712745666504, + 0.47415891289711, + 0.5000466108322144, + 0.5259342193603516, + 0.5518218874931335, + 0.5777095556259155, + 0.6035972237586975, + 0.6294848918914795, + 0.6553725004196167, + 0.6812602281570435, + 0.7071478366851807, + 0.7330355048179626, + 0.7589231729507446, + 0.7848108410835266, + 0.8106985092163086, + 0.8365861177444458, + 0.8624738454818726, + 0.8883614540100098, + 0.9142491221427917, + 0.9401367902755737 + ] + } + }, + "transformer.layers.5.4.ff.0.0.weight": { + "min": -0.3711914122104645, + "max": 0.4754900634288788, + "mean": -8.231064566643909e-05, + "std": 0.04089626669883728, + "abs_mean": 0.03231760859489441, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 83.74752807617188, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 1, + 0, + 8, + 14, + 28, + 49, + 75, + 109, + 152, + 134, + 135, + 118, + 79, + 46, + 32, + 11, + 2, + 3, + 3 + ], + "bin_edges": [ + -0.15829983353614807, + -0.14343194663524628, + -0.12856405973434448, + -0.11369617283344269, + -0.0988282859325409, + -0.0839603990316391, + -0.0690925121307373, + -0.05422462522983551, + -0.039356738328933716, + -0.02448885142803192, + -0.009620964527130127, + 0.0052469223737716675, + 0.020114809274673462, + 0.034982696175575256, + 0.04985058307647705, + 0.06471846997737885, + 0.07958635687828064, + 0.09445425868034363, + 0.10932213068008423, + 0.12419000267982483, + 0.139057919383049 + ] + } + }, + "transformer.layers.5.4.ff.0.0.bias": { + "min": -0.2078404426574707, + "max": 0.02713177166879177, + "mean": -0.030231105163693428, + "std": 0.021318932995200157, + "abs_mean": 0.03098458983004093, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 2.367399215698242, + "elements": 4096, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 1, + 3, + 13, + 25, + 44, + 93, + 194, + 226, + 215, + 112, + 58, + 14 + ], + "bin_edges": [ + -0.2078404426574707, + -0.1963563710451126, + -0.18487228453159332, + -0.17338821291923523, + -0.16190414130687714, + -0.15042006969451904, + -0.13893598318099976, + -0.12745191156864166, + -0.11596783250570297, + -0.10448375344276428, + -0.09299968183040619, + -0.0815156102180481, + -0.07003152370452881, + -0.058547452092170715, + -0.04706338047981262, + -0.035579293966293335, + -0.02409522235393524, + -0.012611150741577148, + -0.0011270642280578613, + 0.010357007384300232, + 0.021841073408722878 + ] + } + }, + "transformer.layers.5.4.ff.2.weight": { + "min": -0.3397354185581207, + "max": 0.7327741384506226, + "mean": 8.48791969474405e-05, + "std": 0.03477150574326515, + "abs_mean": 0.027075331658124924, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ], + "norm": 71.20501708984375, + "elements": 4194304, + "histogram": { + "counts": [ + 2, + 4, + 20, + 33, + 63, + 94, + 120, + 147, + 144, + 139, + 98, + 56, + 42, + 21, + 8, + 2, + 5, + 1, + 0, + 1 + ], + "bin_edges": [ + -0.10304070264101028, + -0.09046163409948349, + -0.07788257300853729, + -0.0653035044670105, + -0.052724439650774, + -0.040145374834537506, + -0.02756630629301071, + -0.014987245202064514, + -0.0024081766605377197, + 0.010170891880989075, + 0.022749952971935272, + 0.03532902151346207, + 0.04790809005498886, + 0.060487158596515656, + 0.07306621223688126, + 0.08564528077840805, + 0.09822434931993484, + 0.11080341786146164, + 0.12338248640298843, + 0.13596153259277344, + 0.14854060113430023 + ] + } + }, + "transformer.layers.5.4.ff.2.bias": { + "min": -0.23985552787780762, + "max": 0.050368692725896835, + "mean": -0.0011948456522077322, + "std": 0.02045026607811451, + "abs_mean": 0.015394063666462898, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.655205488204956, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 4, + 23, + 99, + 217, + 301, + 233, + 103, + 18 + ], + "bin_edges": [ + -0.23985552787780762, + -0.22534431517124176, + -0.2108331024646759, + -0.19632190465927124, + -0.1818106770515442, + -0.16729947924613953, + -0.15278826653957367, + -0.1382770538330078, + -0.12376584112644196, + -0.1092546284198761, + -0.09474341571331024, + -0.08023220300674438, + -0.06572100520133972, + -0.051209792494773865, + -0.03669857978820801, + -0.02218736708164215, + -0.007676154375076294, + 0.006835058331489563, + 0.02134627103805542, + 0.03585746884346008, + 0.050368692725896835 + ] + } + }, + "transformer.layers.6.1.g": { + "min": 0.3060871660709381, + "max": 0.6523372530937195, + "mean": 0.5249941945075989, + "std": 0.04590437561273575, + "abs_mean": 0.5249941945075989, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 16.863849639892578, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 1, + 1, + 2, + 6, + 15, + 12, + 18, + 31, + 54, + 100, + 109, + 158, + 169, + 140, + 118, + 38, + 20, + 5, + 2 + ], + "bin_edges": [ + 0.3060871660709381, + 0.3233996629714966, + 0.34071218967437744, + 0.3580246865749359, + 0.3753371834754944, + 0.39264968037605286, + 0.40996217727661133, + 0.4272747039794922, + 0.44458720088005066, + 0.46189969778060913, + 0.47921222448349, + 0.49652472138404846, + 0.5138372182846069, + 0.5311497449874878, + 0.5484622120857239, + 0.5657747387886047, + 0.5830872058868408, + 0.6003997325897217, + 0.6177122592926025, + 0.6350247859954834, + 0.6523372530937195 + ] + } + }, + "transformer.layers.6.2.to_q.weight": { + "min": -0.30396750569343567, + "max": 0.2171545922756195, + "mean": 7.000747427809983e-05, + "std": 0.03949857875704765, + "abs_mean": 0.031311385333538055, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 40.446109771728516, + "elements": 1048576, + "histogram": { + "counts": [ + 4, + 5, + 10, + 15, + 33, + 42, + 67, + 99, + 94, + 119, + 120, + 112, + 103, + 83, + 35, + 28, + 14, + 10, + 3, + 4 + ], + "bin_edges": [ + -0.12200530618429184, + -0.10982391238212585, + -0.09764251112937927, + -0.08546111732721329, + -0.0732797235250473, + -0.06109832227230072, + -0.048916928470134735, + -0.03673552721738815, + -0.024554133415222168, + -0.012372739613056183, + -0.00019133836030960083, + 0.011990062892436981, + 0.02417144924402237, + 0.03635285049676895, + 0.04853425174951553, + 0.06071563810110092, + 0.0728970393538475, + 0.08507844060659409, + 0.09725982695817947, + 0.10944122821092606, + 0.12162262946367264 + ] + } + }, + "transformer.layers.6.2.to_q.bias": { + "min": -0.14921154081821442, + "max": 0.1312280148267746, + "mean": 0.00034826344926841557, + "std": 0.030445020645856857, + "abs_mean": 0.020625557750463486, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.9738286137580872, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 2, + 1, + 7, + 8, + 9, + 12, + 31, + 64, + 187, + 255, + 231, + 125, + 23, + 15, + 5, + 6, + 7, + 5, + 5 + ], + "bin_edges": [ + -0.14921154081821442, + -0.13518956303596497, + -0.12116758525371552, + -0.10714560747146606, + -0.09312362968921661, + -0.07910165190696716, + -0.06507967412471771, + -0.05105769634246826, + -0.03703571856021881, + -0.02301374077796936, + -0.00899176299571991, + 0.005030214786529541, + 0.01905219256877899, + 0.03307417035102844, + 0.04709614813327789, + 0.061118125915527344, + 0.0751401036977768, + 0.08916208148002625, + 0.1031840592622757, + 0.11720602214336395, + 0.1312280148267746 + ] + } + }, + "transformer.layers.6.2.to_k.weight": { + "min": -0.2569451630115509, + "max": 0.20191657543182373, + "mean": 3.105865835095756e-05, + "std": 0.03948771581053734, + "abs_mean": 0.03128843009471893, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 40.43494415283203, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 3, + 17, + 24, + 58, + 109, + 156, + 169, + 172, + 122, + 92, + 39, + 22, + 12, + 2, + 0, + 2 + ], + "bin_edges": [ + -0.1837465465068817, + -0.1667291522026062, + -0.1497117578983307, + -0.13269434869289398, + -0.11567695438861847, + -0.09865956008434296, + -0.08164215832948685, + -0.06462475657463074, + -0.047607362270355225, + -0.030589967966079712, + -0.0135725736618042, + 0.0034448355436325073, + 0.02046222984790802, + 0.03747962415218353, + 0.05449703335762024, + 0.07151442766189575, + 0.08853182196617126, + 0.10554921627044678, + 0.12256661057472229, + 0.1395840048789978, + 0.15660138428211212 + ] + } + }, + "transformer.layers.6.2.to_k.bias": { + "min": -2.332984685897827, + "max": 2.372544527053833, + "mean": -0.026222502812743187, + "std": 0.44942858815193176, + "abs_mean": 0.3095816969871521, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 14.399161338806152, + "elements": 1024, + "histogram": { + "counts": [ + 3, + 2, + 1, + 5, + 7, + 12, + 19, + 72, + 144, + 274, + 271, + 108, + 47, + 17, + 6, + 3, + 6, + 0, + 2, + 1 + ], + "bin_edges": [ + -2.332984685897827, + -2.097708225250244, + -1.8624317646026611, + -1.6271553039550781, + -1.3918788433074951, + -1.156602382659912, + -0.9213259220123291, + -0.6860494613647461, + -0.4507730007171631, + -0.21549654006958008, + 0.01977992057800293, + 0.25505638122558594, + 0.49033284187316895, + 0.725609302520752, + 0.960885763168335, + 1.196162223815918, + 1.431438684463501, + 1.666715145111084, + 1.901991605758667, + 2.137268304824829, + 2.372544527053833 + ] + } + }, + "transformer.layers.6.2.to_v.weight": { + "min": -0.1888340413570404, + "max": 0.21024198830127716, + "mean": 3.7197845813352615e-05, + "std": 0.03479824960231781, + "abs_mean": 0.02761632390320301, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 35.63287353515625, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 0, + 3, + 12, + 19, + 45, + 56, + 85, + 120, + 134, + 144, + 115, + 113, + 52, + 61, + 21, + 11, + 2, + 2, + 3 + ], + "bin_edges": [ + -0.12260862439870834, + -0.11051894724369049, + -0.09842926263809204, + -0.08633958548307419, + -0.07424990832805634, + -0.062160223722457886, + -0.05007054656744003, + -0.03798086196184158, + -0.02589118480682373, + -0.013801507651805878, + -0.001711823046207428, + 0.010377861559391022, + 0.022467531263828278, + 0.03455721586942673, + 0.04664690047502518, + 0.05873657017946243, + 0.07082625478506088, + 0.08291593939065933, + 0.09500560909509659, + 0.10709529370069504, + 0.11918498575687408 + ] + } + }, + "transformer.layers.6.2.to_v.bias": { + "min": -0.031675707548856735, + "max": 0.035443130880594254, + "mean": -0.00020022659737151116, + "std": 0.012285580858588219, + "abs_mean": 0.010282876901328564, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.3929988145828247, + "elements": 1024, + "histogram": { + "counts": [ + 4, + 11, + 19, + 38, + 47, + 88, + 91, + 82, + 97, + 72, + 91, + 97, + 89, + 55, + 58, + 35, + 16, + 4, + 4, + 2 + ], + "bin_edges": [ + -0.031675707548856735, + -0.028319764882326126, + -0.024963824078440666, + -0.021607881411910057, + -0.018251940608024597, + -0.014895997941493988, + -0.011540055274963379, + -0.008184114471077919, + -0.00482817180454731, + -0.0014722291380167007, + 0.0018837116658687592, + 0.005239654332399368, + 0.008595596998929977, + 0.011951539665460587, + 0.015307478606700897, + 0.018663421273231506, + 0.022019363939762115, + 0.025375306606292725, + 0.028731249272823334, + 0.032087188214063644, + 0.035443130880594254 + ] + } + }, + "transformer.layers.6.2.to_out.0.weight": { + "min": -0.18818390369415283, + "max": 0.17026524245738983, + "mean": -6.799850234529004e-05, + "std": 0.032174814492464066, + "abs_mean": 0.02552211657166481, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 32.94668960571289, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 0, + 5, + 4, + 12, + 13, + 32, + 56, + 87, + 113, + 150, + 151, + 136, + 94, + 54, + 59, + 17, + 9, + 3, + 4 + ], + "bin_edges": [ + -0.12617486715316772, + -0.1149633452296257, + -0.10375183075666428, + -0.09254030883312225, + -0.08132879436016083, + -0.0701172724366188, + -0.05890575051307678, + -0.047694236040115356, + -0.036482714116573334, + -0.02527119219303131, + -0.014059677720069885, + -0.0028481557965278625, + 0.00836336612701416, + 0.019574880599975586, + 0.03078639507293701, + 0.04199792444705963, + 0.05320943892002106, + 0.06442095339298248, + 0.0756324827671051, + 0.08684399724006653, + 0.09805550426244736 + ] + } + }, + "transformer.layers.6.2.to_out.0.bias": { + "min": -0.13918116688728333, + "max": 0.13709498941898346, + "mean": -0.0025172303430736065, + "std": 0.05128452926874161, + "abs_mean": 0.04145807772874832, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.642280101776123, + "elements": 1024, + "histogram": { + "counts": [ + 6, + 6, + 17, + 27, + 42, + 53, + 88, + 81, + 94, + 104, + 113, + 80, + 71, + 69, + 51, + 47, + 21, + 18, + 10, + 2 + ], + "bin_edges": [ + -0.13918116688728333, + -0.12536735832691193, + -0.11155354976654053, + -0.09773974120616913, + -0.08392593264579773, + -0.07011212408542633, + -0.05629831552505493, + -0.04248450696468353, + -0.028670698404312134, + -0.014856889843940735, + -0.001043081283569336, + 0.012770727276802063, + 0.026584535837173462, + 0.04039834439754486, + 0.05421215295791626, + 0.06802596151828766, + 0.08183977007865906, + 0.09565357863903046, + 0.10946738719940186, + 0.12328121066093445, + 0.13709498941898346 + ] + } + }, + "transformer.layers.6.3.g": { + "min": 0.4672186076641083, + "max": 0.9546743631362915, + "mean": 0.6688124537467957, + "std": 0.05250026285648346, + "abs_mean": 0.6688124537467957, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 21.467771530151367, + "elements": 1024, + "histogram": { + "counts": [ + 3, + 7, + 12, + 16, + 21, + 61, + 99, + 194, + 227, + 191, + 98, + 43, + 16, + 5, + 2, + 2, + 0, + 0, + 2, + 1 + ], + "bin_edges": [ + 0.4672186076641083, + 0.4915913939476013, + 0.5159642100334167, + 0.5403369665145874, + 0.5647097826004028, + 0.5890825390815735, + 0.6134553551673889, + 0.6378281116485596, + 0.662200927734375, + 0.6865736842155457, + 0.7109465003013611, + 0.7353192567825317, + 0.7596920728683472, + 0.7840648889541626, + 0.8084376454353333, + 0.8328104019165039, + 0.8571832180023193, + 0.8815560340881348, + 0.9059287905693054, + 0.9303015470504761, + 0.9546743631362915 + ] + } + }, + "transformer.layers.6.4.ff.0.0.weight": { + "min": -0.32424914836883545, + "max": 0.3096342980861664, + "mean": -1.5644945960957557e-06, + "std": 0.04095214605331421, + "abs_mean": 0.03239164128899574, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 83.86203002929688, + "elements": 4194304, + "histogram": { + "counts": [ + 4, + 2, + 2, + 11, + 10, + 48, + 57, + 80, + 112, + 113, + 119, + 130, + 97, + 84, + 54, + 39, + 21, + 13, + 2, + 2 + ], + "bin_edges": [ + -0.13788051903247833, + -0.12475286424160004, + -0.11162521690130234, + -0.09849756211042404, + -0.08536991477012634, + -0.07224225997924805, + -0.05911460518836975, + -0.04598695784807205, + -0.032859303057193756, + -0.01973164826631546, + -0.006604000926017761, + 0.006523653864860535, + 0.01965130865573883, + 0.032778963446617126, + 0.04590660333633423, + 0.059034258127212524, + 0.07216191291809082, + 0.08528956770896912, + 0.09841722249984741, + 0.11154486238956451, + 0.12467251718044281 + ] + } + }, + "transformer.layers.6.4.ff.0.0.bias": { + "min": -0.12461961060762405, + "max": 0.02530832216143608, + "mean": -0.03069971315562725, + "std": 0.019789544865489006, + "abs_mean": 0.03134232759475708, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 2.337536334991455, + "elements": 4096, + "histogram": { + "counts": [ + 1, + 2, + 1, + 7, + 13, + 15, + 17, + 34, + 62, + 87, + 123, + 129, + 141, + 120, + 102, + 60, + 54, + 18, + 11, + 3 + ], + "bin_edges": [ + -0.10850232094526291, + -0.10194070637226105, + -0.09537909924983978, + -0.08881748467683792, + -0.08225587010383606, + -0.0756942629814148, + -0.06913264840841293, + -0.06257103383541107, + -0.05600942671298981, + -0.049447815865278244, + -0.04288620501756668, + -0.03632459044456482, + -0.029762975871562958, + -0.023201368749141693, + -0.01663975417613983, + -0.010078147053718567, + -0.0035165324807167053, + 0.0030450820922851562, + 0.009606689214706421, + 0.016168303787708282, + 0.022729910910129547 + ] + } + }, + "transformer.layers.6.4.ff.2.weight": { + "min": -0.43944308161735535, + "max": 0.4446093440055847, + "mean": 9.534660784993321e-05, + "std": 0.035124197602272034, + "abs_mean": 0.027428196743130684, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ], + "norm": 71.92691040039062, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 0, + 1, + 3, + 2, + 9, + 34, + 89, + 147, + 227, + 205, + 166, + 72, + 28, + 9, + 4, + 1, + 0, + 1, + 1 + ], + "bin_edges": [ + -0.19963732361793518, + -0.1794430911540985, + -0.15924887359142303, + -0.13905464112758636, + -0.11886041611433029, + -0.09866619110107422, + -0.07847195863723755, + -0.05827774107456207, + -0.0380835086107254, + -0.017889276146888733, + 0.002304941415786743, + 0.022499173879623413, + 0.04269340634346008, + 0.06288763880729675, + 0.08308184146881104, + 0.1032760739326477, + 0.12347030639648438, + 0.14366453886032104, + 0.16385877132415771, + 0.184052973985672, + 0.20424720644950867 + ] + } + }, + "transformer.layers.6.4.ff.2.bias": { + "min": -0.22425536811351776, + "max": 0.051573775708675385, + "mean": -0.001182063017040491, + "std": 0.018455415964126587, + "abs_mean": 0.01334807462990284, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.5914956331253052, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 0, + 0, + 1, + 0, + 0, + 1, + 0, + 1, + 7, + 20, + 113, + 285, + 328, + 183, + 55, + 5 + ], + "bin_edges": [ + -0.22425536811351776, + -0.21046391129493713, + -0.1966724544763565, + -0.18288099765777588, + -0.16908954083919525, + -0.15529808402061462, + -0.141506627202034, + -0.12771517038345337, + -0.11392371356487274, + -0.10013225674629211, + -0.08634079992771149, + -0.07254934310913086, + -0.05875788629055023, + -0.044966429471969604, + -0.031174972653388977, + -0.01738351583480835, + -0.003592059016227722, + 0.010199397802352905, + 0.023990854620933533, + 0.037782326340675354, + 0.051573775708675385 + ] + } + }, + "transformer.layers.7.1.g": { + "min": 0.3393731713294983, + "max": 0.737841010093689, + "mean": 0.5586089491844177, + "std": 0.04119626432657242, + "abs_mean": 0.5586089491844177, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 17.923982620239258, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 2, + 2, + 3, + 4, + 6, + 15, + 51, + 76, + 130, + 174, + 217, + 202, + 88, + 15, + 8, + 2, + 2, + 1, + 1 + ], + "bin_edges": [ + 0.34397587180137634, + 0.36366912722587585, + 0.38336238265037537, + 0.4030556380748749, + 0.4227488934993744, + 0.4424421489238739, + 0.4621354341506958, + 0.4818286895751953, + 0.5015219449996948, + 0.5212152004241943, + 0.5409084558486938, + 0.5606017112731934, + 0.5802949666976929, + 0.5999882221221924, + 0.6196814775466919, + 0.6393747329711914, + 0.6590679883956909, + 0.6787612438201904, + 0.6984544992446899, + 0.7181477546691895, + 0.737841010093689 + ] + } + }, + "transformer.layers.7.2.to_q.weight": { + "min": -0.2723452150821686, + "max": 0.2782283425331116, + "mean": 1.9915583834517747e-05, + "std": 0.04106247052550316, + "abs_mean": 0.03229733556509018, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 42.047523498535156, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 1, + 4, + 7, + 20, + 34, + 65, + 128, + 151, + 175, + 156, + 113, + 77, + 40, + 15, + 7, + 6 + ], + "bin_edges": [ + -0.21072475612163544, + -0.1939927637577057, + -0.17726078629493713, + -0.16052879393100739, + -0.14379680156707764, + -0.12706482410430908, + -0.11033283174037933, + -0.09360084682703018, + -0.07686886191368103, + -0.06013686954975128, + -0.04340489208698273, + -0.02667289972305298, + -0.00994090735912323, + 0.006791070103645325, + 0.023523062467575073, + 0.04025505483150482, + 0.056987032294273376, + 0.07371900975704193, + 0.09045101702213287, + 0.10718299448490143, + 0.12391498684883118 + ] + } + }, + "transformer.layers.7.2.to_q.bias": { + "min": -0.13683027029037476, + "max": 0.1396752893924713, + "mean": 0.0004885591333732009, + "std": 0.026614630594849586, + "abs_mean": 0.018938830122351646, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.8513957858085632, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 2, + 4, + 1, + 5, + 9, + 22, + 64, + 164, + 241, + 249, + 147, + 53, + 17, + 5, + 4, + 5, + 3, + 2, + 2 + ], + "bin_edges": [ + -0.13683027029037476, + -0.12300499528646469, + -0.10917971283197403, + -0.09535443782806396, + -0.0815291553735733, + -0.06770388036966324, + -0.053878605365753174, + -0.04005332291126251, + -0.026228047907352448, + -0.012402772903442383, + 0.0014225095510482788, + 0.01524779200553894, + 0.029073059558868408, + 0.04289834201335907, + 0.05672362446784973, + 0.0705488920211792, + 0.08437417447566986, + 0.09819945693016052, + 0.11202472448348999, + 0.12585002183914185, + 0.1396752893924713 + ] + } + }, + "transformer.layers.7.2.to_k.weight": { + "min": -0.49012690782546997, + "max": 0.35547417402267456, + "mean": 8.882825932232663e-05, + "std": 0.04070047289133072, + "abs_mean": 0.03201922029256821, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 41.67690658569336, + "elements": 1048576, + "histogram": { + "counts": [ + 5, + 3, + 9, + 17, + 24, + 43, + 58, + 65, + 76, + 118, + 133, + 132, + 104, + 70, + 50, + 46, + 19, + 17, + 9, + 2 + ], + "bin_edges": [ + -0.12249059975147247, + -0.11079084873199463, + -0.09909109771251678, + -0.08739134669303894, + -0.0756915956735611, + -0.06399184465408325, + -0.052292101085186005, + -0.04059235006570816, + -0.028892599046230316, + -0.017192848026752472, + -0.005493097007274628, + 0.00620664656162262, + 0.017906397581100464, + 0.029606148600578308, + 0.04130589962005615, + 0.053005650639534, + 0.06470540165901184, + 0.07640515267848969, + 0.08810490369796753, + 0.09980465471744537, + 0.11150440573692322 + ] + } + }, + "transformer.layers.7.2.to_k.bias": { + "min": -2.2938547134399414, + "max": 1.7426533699035645, + "mean": -0.021057037636637688, + "std": 0.49975258111953735, + "abs_mean": 0.35557305812835693, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 15.998469352722168, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 4, + 4, + 5, + 15, + 22, + 42, + 56, + 76, + 189, + 248, + 148, + 81, + 42, + 30, + 23, + 5, + 5, + 4 + ], + "bin_edges": [ + -2.2938547134399414, + -2.092029333114624, + -1.8902039527893066, + -1.6883784532546997, + -1.4865530729293823, + -1.284727692604065, + -1.082902193069458, + -0.8810768127441406, + -0.6792514324188232, + -0.47742605209350586, + -0.2756006717681885, + -0.0737752914428711, + 0.1280503273010254, + 0.3298757076263428, + 0.5317010879516602, + 0.7335264682769775, + 0.9353518486022949, + 1.1371772289276123, + 1.3390026092529297, + 1.540827989578247, + 1.7426533699035645 + ] + } + }, + "transformer.layers.7.2.to_v.weight": { + "min": -0.21735826134681702, + "max": 0.19773884117603302, + "mean": -4.063967935508117e-05, + "std": 0.03423747047781944, + "abs_mean": 0.02707846462726593, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 35.05868911743164, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 7, + 5, + 22, + 46, + 92, + 123, + 143, + 160, + 134, + 111, + 77, + 38, + 19, + 11, + 6, + 1, + 1, + 1, + 1 + ], + "bin_edges": [ + -0.11161135137081146, + -0.09862302988767624, + -0.08563470840454102, + -0.07264638692140579, + -0.05965806543827057, + -0.046669743955135345, + -0.03368142247200012, + -0.0206931009888649, + -0.007704779505729675, + 0.005283541977405548, + 0.01827186346054077, + 0.0312601774930954, + 0.04424850642681122, + 0.05723683536052704, + 0.07022514939308167, + 0.08321346342563629, + 0.09620179235935211, + 0.10919012129306793, + 0.12217843532562256, + 0.13516674935817719, + 0.148155078291893 + ] + } + }, + "transformer.layers.7.2.to_v.bias": { + "min": -0.041265569627285004, + "max": 0.03861430287361145, + "mean": -0.00014519633259624243, + "std": 0.012876993976533413, + "abs_mean": 0.010816301219165325, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.4118887782096863, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 4, + 7, + 26, + 56, + 76, + 91, + 93, + 82, + 97, + 84, + 107, + 91, + 76, + 58, + 30, + 13, + 3, + 2, + 3 + ], + "bin_edges": [ + -0.03600998595356941, + -0.032278772443532944, + -0.028547557070851326, + -0.024816343560814857, + -0.02108512818813324, + -0.01735391467809677, + -0.013622701168060303, + -0.009891485795378685, + -0.0061602722853422165, + -0.0024290569126605988, + 0.0013021565973758698, + 0.005033370107412338, + 0.008764583617448807, + 0.012495797127485275, + 0.016227014362812042, + 0.01995822787284851, + 0.02368944138288498, + 0.027420658618211746, + 0.031151872128248215, + 0.03488308563828468, + 0.03861430287361145 + ] + } + }, + "transformer.layers.7.2.to_out.0.weight": { + "min": -0.17728237807750702, + "max": 0.18350861966609955, + "mean": 4.7603076382074505e-05, + "std": 0.031560394912958145, + "abs_mean": 0.02491084672510624, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 32.31753921508789, + "elements": 1048576, + "histogram": { + "counts": [ + 3, + 1, + 5, + 12, + 20, + 30, + 41, + 79, + 117, + 120, + 119, + 132, + 114, + 76, + 50, + 36, + 23, + 14, + 5, + 3 + ], + "bin_edges": [ + -0.1022830680012703, + -0.09248834103345871, + -0.08269362151622772, + -0.07289889454841614, + -0.06310416758060455, + -0.05330944061279297, + -0.04351471737027168, + -0.0337199941277504, + -0.023925267159938812, + -0.014130540192127228, + -0.004335813224315643, + 0.005458906292915344, + 0.015253633260726929, + 0.025048352777957916, + 0.0348430797457695, + 0.044637806713581085, + 0.05443253368139267, + 0.06422726064920425, + 0.07402198761701584, + 0.08381671458482742, + 0.0936114490032196 + ] + } + }, + "transformer.layers.7.2.to_out.0.bias": { + "min": -0.1796274185180664, + "max": 0.18359197676181793, + "mean": -0.0022178757935762405, + "std": 0.05480958893895149, + "abs_mean": 0.04388166591525078, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.7544862031936646, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 3, + 4, + 17, + 29, + 58, + 72, + 100, + 108, + 147, + 114, + 100, + 106, + 56, + 38, + 27, + 12, + 5, + 0, + 2 + ], + "bin_edges": [ + -0.1796274185180664, + -0.16146644949913025, + -0.1433054804801941, + -0.12514451146125793, + -0.10698354244232178, + -0.08882257342338562, + -0.07066160440444946, + -0.052500635385513306, + -0.03433966636657715, + -0.01617869734764099, + 0.001982271671295166, + 0.020143240690231323, + 0.03830420970916748, + 0.05646517872810364, + 0.0746261477470398, + 0.09278711676597595, + 0.11094808578491211, + 0.12910905480384827, + 0.14727002382278442, + 0.16543099284172058, + 0.18359197676181793 + ] + } + }, + "transformer.layers.7.3.g": { + "min": 0.47430306673049927, + "max": 1.0235347747802734, + "mean": 0.645234227180481, + "std": 0.05006485432386398, + "abs_mean": 0.645234227180481, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 20.709495544433594, + "elements": 1024, + "histogram": { + "counts": [ + 6, + 4, + 18, + 50, + 133, + 230, + 285, + 171, + 60, + 18, + 12, + 5, + 3, + 0, + 1, + 0, + 0, + 1, + 0, + 3 + ], + "bin_edges": [ + 0.47430306673049927, + 0.5017646551132202, + 0.5292262434959412, + 0.5566878318786621, + 0.5841494202613831, + 0.611611008644104, + 0.6390725374221802, + 0.6665341854095459, + 0.6939957141876221, + 0.7214573621749878, + 0.748918890953064, + 0.7763805389404297, + 0.8038420677185059, + 0.8313036561012268, + 0.8587652444839478, + 0.8862268328666687, + 0.9136884212493896, + 0.9411500096321106, + 0.9686115980148315, + 0.9960731863975525, + 1.0235347747802734 + ] + } + }, + "transformer.layers.7.4.ff.0.0.weight": { + "min": -0.2717384696006775, + "max": 0.3092706799507141, + "mean": 0.0001124507180065848, + "std": 0.04068849980831146, + "abs_mean": 0.032301004976034164, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 83.32220458984375, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 0, + 0, + 3, + 3, + 10, + 20, + 47, + 71, + 103, + 133, + 159, + 136, + 122, + 87, + 55, + 34, + 13, + 1, + 2 + ], + "bin_edges": [ + -0.18017078936100006, + -0.16486860811710358, + -0.1495664268732071, + -0.13426423072814941, + -0.11896204948425293, + -0.10365986824035645, + -0.08835768699645996, + -0.07305549830198288, + -0.057753317058086395, + -0.042451128363609314, + -0.02714894711971283, + -0.011846765875816345, + 0.003455415368080139, + 0.018757596611976624, + 0.0340597927570343, + 0.049361974000930786, + 0.06466415524482727, + 0.07996635138988495, + 0.09526853263378143, + 0.11057071387767792, + 0.1258728951215744 + ] + } + }, + "transformer.layers.7.4.ff.0.0.bias": { + "min": -0.10565188527107239, + "max": 0.026852920651435852, + "mean": -0.029502389952540398, + "std": 0.017905903980135918, + "abs_mean": 0.030034106224775314, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 2.2086341381073, + "elements": 4096, + "histogram": { + "counts": [ + 2, + 3, + 3, + 5, + 8, + 11, + 38, + 60, + 87, + 93, + 100, + 122, + 107, + 89, + 88, + 79, + 53, + 35, + 11, + 6 + ], + "bin_edges": [ + -0.09144522249698639, + -0.08617058396339417, + -0.08089595288038254, + -0.07562131434679031, + -0.07034668326377869, + -0.06507204473018646, + -0.05979740619659424, + -0.05452277138829231, + -0.04924813657999039, + -0.04397350177168846, + -0.038698866963386536, + -0.03342422842979431, + -0.028149589896202087, + -0.02287495881319046, + -0.017600320279598236, + -0.012325689196586609, + -0.007051050662994385, + -0.0017764121294021606, + 0.0034982189536094666, + 0.00877285748720169, + 0.014047490432858467 + ] + } + }, + "transformer.layers.7.4.ff.2.weight": { + "min": -0.33881059288978577, + "max": 0.3287763297557831, + "mean": 5.716992018278688e-05, + "std": 0.03441813588142395, + "abs_mean": 0.02711346372961998, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ], + "norm": 70.48104095458984, + "elements": 4194304, + "histogram": { + "counts": [ + 2, + 1, + 9, + 12, + 35, + 58, + 128, + 119, + 155, + 151, + 127, + 79, + 64, + 29, + 17, + 9, + 3, + 1, + 0, + 1 + ], + "bin_edges": [ + -0.11595162004232407, + -0.10283190757036209, + -0.08971219509840012, + -0.07659248262643814, + -0.06347277015447617, + -0.05035305768251419, + -0.037233345210552216, + -0.02411363273859024, + -0.010993920266628265, + 0.0021257922053337097, + 0.015245504677295685, + 0.02836521714925766, + 0.041484929621219635, + 0.05460464209318161, + 0.06772435456514359, + 0.08084406703710556, + 0.09396377950906754, + 0.10708349198102951, + 0.12020320445299149, + 0.13332292437553406, + 0.14644262194633484 + ] + } + }, + "transformer.layers.7.4.ff.2.bias": { + "min": -0.1814029961824417, + "max": 0.04198184236884117, + "mean": -0.0010715797543525696, + "std": 0.017202889546751976, + "abs_mean": 0.012709951028227806, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.5512910485267639, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 3, + 12, + 43, + 134, + 259, + 270, + 188, + 70, + 19 + ], + "bin_edges": [ + -0.1814029961824417, + -0.17023375630378723, + -0.15906451642513275, + -0.14789527654647827, + -0.1367260217666626, + -0.1255567967891693, + -0.11438754200935364, + -0.10321830213069916, + -0.09204906225204468, + -0.0808798223733902, + -0.06971058249473572, + -0.05854133516550064, + -0.047372087836265564, + -0.036202847957611084, + -0.025033608078956604, + -0.013864368200302124, + -0.002695128321647644, + 0.008474111557006836, + 0.019643351435661316, + 0.030812591314315796, + 0.04198184236884117 + ] + } + }, + "transformer.layers.8.1.g": { + "min": 0.32546839118003845, + "max": 0.6852879524230957, + "mean": 0.5111152529716492, + "std": 0.036710962653160095, + "abs_mean": 0.5111152529716492, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 16.397781372070312, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 0, + 4, + 2, + 3, + 9, + 28, + 67, + 112, + 187, + 208, + 192, + 99, + 62, + 16, + 6, + 2, + 0, + 0, + 1 + ], + "bin_edges": [ + 0.32546839118003845, + 0.3434593677520752, + 0.36145034432411194, + 0.3794413208961487, + 0.3974322974681854, + 0.41542327404022217, + 0.4334142804145813, + 0.45140522718429565, + 0.4693962335586548, + 0.48738718032836914, + 0.5053781867027283, + 0.5233691334724426, + 0.5413601398468018, + 0.5593510866165161, + 0.5773420929908752, + 0.5953330993652344, + 0.6133240461349487, + 0.6313149929046631, + 0.6493059992790222, + 0.6672970056533813, + 0.6852879524230957 + ] + } + }, + "transformer.layers.8.2.to_q.weight": { + "min": -0.23360855877399445, + "max": 0.22551532089710236, + "mean": -3.5930093872593716e-05, + "std": 0.039181701838970184, + "abs_mean": 0.031050506979227066, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 40.12158203125, + "elements": 1048576, + "histogram": { + "counts": [ + 3, + 4, + 15, + 30, + 25, + 51, + 70, + 80, + 125, + 135, + 101, + 115, + 90, + 71, + 33, + 23, + 12, + 13, + 3, + 1 + ], + "bin_edges": [ + -0.11901972442865372, + -0.10700470209121704, + -0.09498967230319977, + -0.08297464996576309, + -0.07095962762832642, + -0.05894460529088974, + -0.04692957550287247, + -0.03491455316543579, + -0.022899530827999115, + -0.010884508490562439, + 0.001130513846874237, + 0.01314554363489151, + 0.025160573422908783, + 0.03717558830976486, + 0.049190618097782135, + 0.061205632984638214, + 0.07322066277265549, + 0.08523569256067276, + 0.09725070744752884, + 0.10926573723554611, + 0.1212807446718216 + ] + } + }, + "transformer.layers.8.2.to_q.bias": { + "min": -0.11516069620847702, + "max": 0.13141536712646484, + "mean": 0.00015141721814870834, + "std": 0.02916705049574375, + "abs_mean": 0.02083246223628521, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.9329023361206055, + "elements": 1024, + "histogram": { + "counts": [ + 4, + 5, + 5, + 9, + 11, + 24, + 50, + 121, + 207, + 199, + 183, + 101, + 35, + 20, + 6, + 6, + 4, + 2, + 5, + 3 + ], + "bin_edges": [ + -0.11516069620847702, + -0.1028318926692009, + -0.09050308912992477, + -0.07817428559064865, + -0.06584548205137253, + -0.053516678512096405, + -0.04118787497282028, + -0.02885907143354416, + -0.016530267894268036, + -0.004201464354991913, + 0.00812733918428421, + 0.020456142723560333, + 0.032784946262836456, + 0.04511374980211258, + 0.0574425533413887, + 0.06977135688066483, + 0.08210016041994095, + 0.09442896395921707, + 0.1067577674984932, + 0.11908657103776932, + 0.13141536712646484 + ] + } + }, + "transformer.layers.8.2.to_k.weight": { + "min": -0.3523465394973755, + "max": 0.2849816083908081, + "mean": 7.249596819747239e-06, + "std": 0.039250195026397705, + "abs_mean": 0.031097358092665672, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 40.191776275634766, + "elements": 1048576, + "histogram": { + "counts": [ + 8, + 14, + 21, + 31, + 63, + 85, + 122, + 125, + 149, + 116, + 88, + 68, + 57, + 28, + 13, + 5, + 5, + 1, + 0, + 1 + ], + "bin_edges": [ + -0.10888364911079407, + -0.09555511176586151, + -0.08222658187150955, + -0.068898044526577, + -0.05556951090693474, + -0.04224097728729248, + -0.028912439942359924, + -0.015583910048007965, + -0.002255372703075409, + 0.011073164641857147, + 0.024401694536209106, + 0.03773023188114166, + 0.05105876922607422, + 0.06438730657100677, + 0.07771582901477814, + 0.0910443663597107, + 0.10437290370464325, + 0.1177014410495758, + 0.13102997839450836, + 0.14435851573944092, + 0.15768705308437347 + ] + } + }, + "transformer.layers.8.2.to_k.bias": { + "min": -4.126643180847168, + "max": 3.538667678833008, + "mean": -0.011556778103113174, + "std": 0.681910514831543, + "abs_mean": 0.39681142568588257, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 21.813613891601562, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 3, + 1, + 1, + 8, + 3, + 9, + 25, + 31, + 97, + 455, + 247, + 54, + 30, + 12, + 8, + 8, + 3, + 1, + 2 + ], + "bin_edges": [ + -4.126643180847168, + -3.743377685546875, + -3.360112190246582, + -2.97684645652771, + -2.593580961227417, + -2.210315465927124, + -1.827049732208252, + -1.443784236907959, + -1.060518741607666, + -0.677253246307373, + -0.2939877510070801, + 0.08927774429321289, + 0.47254371643066406, + 0.855809211730957, + 1.23907470703125, + 1.622340202331543, + 2.005605697631836, + 2.388871192932129, + 2.772136688232422, + 3.155402183532715, + 3.538667678833008 + ] + } + }, + "transformer.layers.8.2.to_v.weight": { + "min": -0.2112656831741333, + "max": 0.20894697308540344, + "mean": 3.47470777342096e-05, + "std": 0.03448949381709099, + "abs_mean": 0.027210766449570656, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 35.31674575805664, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 2, + 8, + 11, + 10, + 25, + 52, + 79, + 113, + 131, + 149, + 116, + 107, + 68, + 55, + 31, + 18, + 16, + 3, + 4 + ], + "bin_edges": [ + -0.11793059855699539, + -0.10670945793390274, + -0.09548831731081009, + -0.08426717668771744, + -0.07304603606462479, + -0.061824895441532135, + -0.050603754818439484, + -0.03938261419534683, + -0.02816147357225418, + -0.01694033294916153, + -0.005719192326068878, + 0.005501948297023773, + 0.016723088920116425, + 0.027944229543209076, + 0.03916537016630173, + 0.05038651078939438, + 0.06160765141248703, + 0.07282879203557968, + 0.08404993265867233, + 0.09527107328176498, + 0.10649221390485764 + ] + } + }, + "transformer.layers.8.2.to_v.bias": { + "min": -0.03565378487110138, + "max": 0.0480014868080616, + "mean": 0.0007942374795675278, + "std": 0.012850471772253513, + "abs_mean": 0.010666023939847946, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.4117993414402008, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 8, + 20, + 33, + 69, + 78, + 104, + 101, + 113, + 106, + 112, + 106, + 68, + 50, + 20, + 5, + 4, + 1, + 0, + 1 + ], + "bin_edges": [ + -0.03565378487110138, + -0.031471021473407745, + -0.02728825807571411, + -0.023105494678020477, + -0.018922731280326843, + -0.01473996788263321, + -0.010557204484939575, + -0.006374441087245941, + -0.002191677689552307, + 0.001991085708141327, + 0.006173849105834961, + 0.010356612503528595, + 0.014539375901222229, + 0.018722139298915863, + 0.022904902696609497, + 0.02708766609430313, + 0.031270429491996765, + 0.0354531928896904, + 0.03963595628738403, + 0.04381871968507767, + 0.0480014868080616 + ] + } + }, + "transformer.layers.8.2.to_out.0.weight": { + "min": -0.21031072735786438, + "max": 0.19297289848327637, + "mean": -1.2874927506345557e-06, + "std": 0.03169998526573181, + "abs_mean": 0.02499360963702202, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 32.46042251586914, + "elements": 1048576, + "histogram": { + "counts": [ + 3, + 5, + 7, + 25, + 48, + 66, + 109, + 116, + 140, + 118, + 128, + 87, + 63, + 34, + 22, + 16, + 5, + 4, + 3, + 1 + ], + "bin_edges": [ + -0.09405967593193054, + -0.08362150937318802, + -0.07318335026502609, + -0.06274518370628357, + -0.052307020872831345, + -0.04186885803937912, + -0.0314306914806366, + -0.02099253237247467, + -0.010554365813732147, + -0.00011619925498962402, + 0.010321959853172302, + 0.020760126411914825, + 0.03119829297065735, + 0.041636452078819275, + 0.0520746111869812, + 0.06251278519630432, + 0.07295094430446625, + 0.08338910341262817, + 0.0938272774219513, + 0.10426543653011322, + 0.11470360308885574 + ] + } + }, + "transformer.layers.8.2.to_out.0.bias": { + "min": -0.18637274205684662, + "max": 0.17692941427230835, + "mean": -0.0028488910757005215, + "std": 0.05860321223735809, + "abs_mean": 0.04695521295070648, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.8766025304794312, + "elements": 1024, + "histogram": { + "counts": [ + 5, + 0, + 8, + 13, + 27, + 45, + 71, + 90, + 106, + 130, + 123, + 102, + 91, + 74, + 49, + 26, + 21, + 13, + 3, + 3 + ], + "bin_edges": [ + -0.18637274205684662, + -0.16820763051509857, + -0.15004253387451172, + -0.13187742233276367, + -0.11371231079101562, + -0.09554719924926758, + -0.07738209515810013, + -0.05921699106693268, + -0.04105187952518463, + -0.022886767983436584, + -0.004721656441688538, + 0.013443440198898315, + 0.03160855174064636, + 0.04977366328239441, + 0.06793875992298126, + 0.08610387146472931, + 0.10426898300647736, + 0.1224340945482254, + 0.14059920608997345, + 0.1587643176317215, + 0.17692941427230835 + ] + } + }, + "transformer.layers.8.3.g": { + "min": 0.47467249631881714, + "max": 1.0397725105285645, + "mean": 0.6513394117355347, + "std": 0.049329087138175964, + "abs_mean": 0.6513394117355347, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 20.90249252319336, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 5, + 8, + 29, + 165, + 235, + 313, + 144, + 46, + 22, + 11, + 7, + 5, + 5, + 1, + 0, + 0, + 0, + 0, + 2 + ], + "bin_edges": [ + 0.47467249631881714, + 0.5029274821281433, + 0.5311825275421143, + 0.5594375133514404, + 0.5876924991607666, + 0.6159474849700928, + 0.644202470779419, + 0.6724575161933899, + 0.7007125020027161, + 0.7289674878120422, + 0.7572225332260132, + 0.7854775190353394, + 0.8137325048446655, + 0.8419874906539917, + 0.8702424764633179, + 0.8984975218772888, + 0.926752507686615, + 0.9550074934959412, + 0.9832624793052673, + 1.0115175247192383, + 1.0397725105285645 + ] + } + }, + "transformer.layers.8.4.ff.0.0.weight": { + "min": -0.248422771692276, + "max": 0.32902756333351135, + "mean": 0.00018066739721689373, + "std": 0.04057690501213074, + "abs_mean": 0.03225279226899147, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 83.09464263916016, + "elements": 4194304, + "histogram": { + "counts": [ + 3, + 3, + 5, + 19, + 21, + 33, + 67, + 83, + 87, + 106, + 120, + 134, + 104, + 77, + 59, + 35, + 23, + 16, + 2, + 3 + ], + "bin_edges": [ + -0.132222980260849, + -0.11964092403650284, + -0.10705886781215668, + -0.09447681903839111, + -0.08189476281404495, + -0.06931270658969879, + -0.05673065781593323, + -0.04414860159158707, + -0.031566545367240906, + -0.018984489142894745, + -0.006402432918548584, + 0.00617961585521698, + 0.018761664628982544, + 0.0313437283039093, + 0.043925777077674866, + 0.056507840752601624, + 0.06908988952636719, + 0.08167193830013275, + 0.09425400197505951, + 0.10683605074882507, + 0.11941809952259064 + ] + } + }, + "transformer.layers.8.4.ff.0.0.bias": { + "min": -0.12427264451980591, + "max": 0.024594629183411598, + "mean": -0.030488643795251846, + "std": 0.017578164115548134, + "abs_mean": 0.031011048704385757, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 2.2522852420806885, + "elements": 4096, + "histogram": { + "counts": [ + 1, + 2, + 3, + 7, + 16, + 26, + 69, + 79, + 110, + 123, + 111, + 126, + 104, + 95, + 55, + 44, + 14, + 8, + 4, + 3 + ], + "bin_edges": [ + -0.093555748462677, + -0.08764822781085968, + -0.08174070715904236, + -0.07583319395780563, + -0.06992567330598831, + -0.06401815265417099, + -0.05811063572764397, + -0.05220311880111694, + -0.04629559814929962, + -0.0403880774974823, + -0.034480560570955276, + -0.028573043644428253, + -0.02266552299261093, + -0.01675800234079361, + -0.010850489139556885, + -0.004942968487739563, + 0.0009645521640777588, + 0.006872072815895081, + 0.012779593467712402, + 0.018687106668949127, + 0.024594629183411598 + ] + } + }, + "transformer.layers.8.4.ff.2.weight": { + "min": -0.4205840826034546, + "max": 0.4813268184661865, + "mean": 2.129650965798646e-06, + "std": 0.035403117537498474, + "abs_mean": 0.027970315888524055, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ], + "norm": 72.49813079833984, + "elements": 4194304, + "histogram": { + "counts": [ + 4, + 8, + 18, + 34, + 62, + 76, + 122, + 128, + 138, + 117, + 113, + 72, + 46, + 36, + 9, + 10, + 3, + 3, + 0, + 1 + ], + "bin_edges": [ + -0.10240344703197479, + -0.09038490056991577, + -0.07836635410785675, + -0.06634780019521713, + -0.05432925373315811, + -0.04231070727109909, + -0.030292153358459473, + -0.01827360689640045, + -0.006255060434341431, + 0.00576348602771759, + 0.01778203248977661, + 0.029800578951835632, + 0.04181914031505585, + 0.05383768677711487, + 0.06585623323917389, + 0.07787477970123291, + 0.08989332616329193, + 0.10191187262535095, + 0.11393041908740997, + 0.125948965549469, + 0.1379675269126892 + ] + } + }, + "transformer.layers.8.4.ff.2.bias": { + "min": -0.15161579847335815, + "max": 0.043303120881319046, + "mean": 3.9640130125917494e-05, + "std": 0.014866231009364128, + "abs_mean": 0.011253134347498417, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.47548872232437134, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 11, + 34, + 83, + 214, + 277, + 225, + 103, + 42, + 10 + ], + "bin_edges": [ + -0.15161579847335815, + -0.14186985790729523, + -0.1321239024400711, + -0.12237796187400818, + -0.11263201385736465, + -0.10288606584072113, + -0.0931401252746582, + -0.08339417725801468, + -0.07364822924137115, + -0.06390228122472763, + -0.054156333208084106, + -0.04441039264202118, + -0.034664444625377655, + -0.02491849660873413, + -0.015172556042671204, + -0.0054266005754470825, + 0.004319339990615845, + 0.014065280556678772, + 0.023811236023902893, + 0.03355717658996582, + 0.043303120881319046 + ] + } + }, + "transformer.layers.9.1.g": { + "min": 0.3155551552772522, + "max": 0.6806549429893494, + "mean": 0.5528165102005005, + "std": 0.04051704332232475, + "abs_mean": 0.5528165102005005, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 17.737529754638672, + "elements": 1024, + "histogram": { + "counts": [ + 3, + 1, + 0, + 1, + 2, + 6, + 7, + 8, + 19, + 32, + 61, + 115, + 178, + 227, + 186, + 114, + 30, + 6, + 3, + 1 + ], + "bin_edges": [ + 0.3155551552772522, + 0.33381015062332153, + 0.35206514596939087, + 0.3703201413154602, + 0.38857510685920715, + 0.4068301022052765, + 0.4250850975513458, + 0.4433400630950928, + 0.4615950584411621, + 0.47985005378723145, + 0.4981050491333008, + 0.5163600444793701, + 0.5346150398254395, + 0.5528700351715088, + 0.5711250305175781, + 0.5893800258636475, + 0.6076350212097168, + 0.6258900165557861, + 0.6441450119018555, + 0.66239994764328, + 0.6806549429893494 + ] + } + }, + "transformer.layers.9.2.to_q.weight": { + "min": -0.2062118798494339, + "max": 0.21964126825332642, + "mean": 3.0860355764161795e-05, + "std": 0.038303423672914505, + "abs_mean": 0.030420470982789993, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 39.222164154052734, + "elements": 1048576, + "histogram": { + "counts": [ + 5, + 2, + 11, + 16, + 24, + 47, + 70, + 100, + 136, + 138, + 143, + 101, + 77, + 56, + 34, + 31, + 5, + 2, + 0, + 2 + ], + "bin_edges": [ + -0.12023625522851944, + -0.10769837349653244, + -0.09516048431396484, + -0.08262260258197784, + -0.07008472084999084, + -0.057546839118003845, + -0.04500894993543625, + -0.03247106820344925, + -0.01993318647146225, + -0.00739530473947525, + 0.005142576992511749, + 0.01768045872449875, + 0.030218355357646942, + 0.04275623708963394, + 0.05529411882162094, + 0.06783200055360794, + 0.08036988228559494, + 0.09290776401758194, + 0.10544564574956894, + 0.11798352748155594, + 0.13052140176296234 + ] + } + }, + "transformer.layers.9.2.to_q.bias": { + "min": -0.1376407891511917, + "max": 0.11259414255619049, + "mean": 2.069001493509859e-05, + "std": 0.02579990215599537, + "abs_mean": 0.01859201118350029, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.8251938819885254, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 1, + 2, + 3, + 2, + 6, + 7, + 29, + 72, + 151, + 221, + 233, + 145, + 77, + 20, + 12, + 6, + 5, + 3, + 4 + ], + "bin_edges": [ + -0.1376407891511917, + -0.12512904405593872, + -0.11261729896068573, + -0.10010554641485214, + -0.08759380131959915, + -0.07508205622434616, + -0.06257030367851257, + -0.05005855858325958, + -0.03754681348800659, + -0.0250350683927536, + -0.01252332329750061, + -1.1578202247619629e-05, + 0.012500181794166565, + 0.025011926889419556, + 0.037523671984672546, + 0.05003541707992554, + 0.06254716217517853, + 0.07505890727043152, + 0.08757065236568451, + 0.1000823974609375, + 0.11259414255619049 + ] + } + }, + "transformer.layers.9.2.to_k.weight": { + "min": -0.40213435888290405, + "max": 0.3705216944217682, + "mean": 2.6252395400661044e-05, + "std": 0.03818526491522789, + "abs_mean": 0.030293822288513184, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 39.10115051269531, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 0, + 0, + 4, + 14, + 24, + 53, + 66, + 91, + 122, + 125, + 142, + 119, + 87, + 58, + 35, + 27, + 17, + 11, + 4 + ], + "bin_edges": [ + -0.13659250736236572, + -0.12421666085720062, + -0.11184081435203552, + -0.09946496039628983, + -0.08708911389112473, + -0.07471326738595963, + -0.06233741343021393, + -0.04996156692504883, + -0.03758572041988373, + -0.025209873914718628, + -0.012834027409553528, + -0.00045818090438842773, + 0.011917680501937866, + 0.024293527007102966, + 0.036669373512268066, + 0.049045220017433167, + 0.06142106652259827, + 0.07379691302776337, + 0.08617275953292847, + 0.09854860603809357, + 0.11092444509267807 + ] + } + }, + "transformer.layers.9.2.to_k.bias": { + "min": -3.765413761138916, + "max": 2.86456298828125, + "mean": 0.0011342763900756836, + "std": 0.5163310766220093, + "abs_mean": 0.3170565068721771, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 16.514562606811523, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 1, + 2, + 6, + 3, + 6, + 23, + 64, + 221, + 446, + 145, + 39, + 21, + 9, + 3, + 2, + 6, + 2 + ], + "bin_edges": [ + -3.765413761138916, + -3.43391489982605, + -3.1024160385131836, + -2.7709174156188965, + -2.439418315887451, + -2.107919692993164, + -1.7764208316802979, + -1.4449219703674316, + -1.1134231090545654, + -0.7819242477416992, + -0.450425386428833, + -0.1189265251159668, + 0.2125720977783203, + 0.5440711975097656, + 0.8755698204040527, + 1.207068920135498, + 1.5385675430297852, + 1.8700661659240723, + 2.2015652656555176, + 2.5330638885498047, + 2.86456298828125 + ] + } + }, + "transformer.layers.9.2.to_v.weight": { + "min": -0.20278441905975342, + "max": 0.1972842514514923, + "mean": 2.9531782274716534e-05, + "std": 0.034300558269023895, + "abs_mean": 0.027055270969867706, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 35.123321533203125, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 1, + 4, + 9, + 21, + 35, + 57, + 60, + 83, + 141, + 126, + 153, + 115, + 89, + 43, + 31, + 17, + 10, + 1, + 2 + ], + "bin_edges": [ + -0.12183795869350433, + -0.11040417850017548, + -0.09897040575742722, + -0.08753662556409836, + -0.0761028528213501, + -0.06466907262802124, + -0.05323529243469238, + -0.04180151969194412, + -0.030367739498615265, + -0.018933959305286407, + -0.007500186562538147, + 0.0039335936307907104, + 0.015367373824119568, + 0.026801154017448425, + 0.03823491930961609, + 0.049668699502944946, + 0.061102479696273804, + 0.07253625988960266, + 0.08397004008293152, + 0.09540380537509918, + 0.10683758556842804 + ] + } + }, + "transformer.layers.9.2.to_v.bias": { + "min": -0.05089922249317169, + "max": 0.03997639939188957, + "mean": -0.00041936602792702615, + "std": 0.013420597650110722, + "abs_mean": 0.01112040039151907, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.4294591248035431, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 4, + 7, + 21, + 46, + 88, + 98, + 116, + 98, + 118, + 117, + 107, + 75, + 65, + 22, + 12, + 2, + 3 + ], + "bin_edges": [ + -0.05089922249317169, + -0.046355441212654114, + -0.041811659932136536, + -0.03726787865161896, + -0.03272409737110138, + -0.0281803160905838, + -0.023636534810066223, + -0.019092753529548645, + -0.014548972249031067, + -0.010005190968513489, + -0.005461409687995911, + -0.0009176284074783325, + 0.0036261528730392456, + 0.008169934153556824, + 0.012713715434074402, + 0.01725749671459198, + 0.021801277995109558, + 0.026345059275627136, + 0.030888840556144714, + 0.03543262183666229, + 0.03997639939188957 + ] + } + }, + "transformer.layers.9.2.to_out.0.weight": { + "min": -0.19621425867080688, + "max": 0.20147208869457245, + "mean": -1.2328569937380962e-05, + "std": 0.0318082757294178, + "abs_mean": 0.02506079152226448, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 32.571311950683594, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 2, + 0, + 1, + 3, + 11, + 16, + 54, + 100, + 160, + 153, + 179, + 142, + 79, + 62, + 22, + 11, + 2, + 1, + 1 + ], + "bin_edges": [ + -0.1516515016555786, + -0.1376262605190277, + -0.123601034283638, + -0.1095757931470871, + -0.09555055946111679, + -0.08152532577514648, + -0.06750008463859558, + -0.053474850952625275, + -0.03944961726665497, + -0.025424376130104065, + -0.011399149894714355, + 0.002626091241836548, + 0.01665133237838745, + 0.03067655861377716, + 0.044701799750328064, + 0.05872702598571777, + 0.07275226712226868, + 0.08677750825881958, + 0.10080274939537048, + 0.114827960729599, + 0.1288532018661499 + ] + } + }, + "transformer.layers.9.2.to_out.0.bias": { + "min": -0.19283677637577057, + "max": 0.1948237270116806, + "mean": -0.002969849156215787, + "std": 0.06253352016210556, + "abs_mean": 0.050027262419462204, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 2.002351760864258, + "elements": 1024, + "histogram": { + "counts": [ + 5, + 4, + 8, + 16, + 37, + 49, + 81, + 89, + 116, + 124, + 111, + 112, + 82, + 67, + 41, + 21, + 28, + 5, + 1, + 3 + ], + "bin_edges": [ + -0.19283677637577057, + -0.17345374822616577, + -0.15407073497772217, + -0.13468770682811737, + -0.11530467867851257, + -0.09592165052890778, + -0.07653862982988358, + -0.057155609130859375, + -0.03777258098125458, + -0.01838955283164978, + 0.000993475317955017, + 0.02037648856639862, + 0.03975951671600342, + 0.059142544865608215, + 0.07852555811405182, + 0.09790860116481781, + 0.11729161441326141, + 0.13667462766170502, + 0.156057670712471, + 0.1754406839609146, + 0.1948237270116806 + ] + } + }, + "transformer.layers.9.3.g": { + "min": 0.34950727224349976, + "max": 1.081899642944336, + "mean": 0.6671000123023987, + "std": 0.05490493029356003, + "abs_mean": 0.6671000123023987, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 21.419307708740234, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 1, + 1, + 0, + 2, + 9, + 54, + 230, + 383, + 209, + 60, + 21, + 13, + 8, + 3, + 2, + 0, + 0, + 1, + 2 + ], + "bin_edges": [ + 0.34950727224349976, + 0.38612687587738037, + 0.4227465093135834, + 0.4593661427497864, + 0.495985746383667, + 0.5326053500175476, + 0.5692249536514282, + 0.6058446168899536, + 0.6424642205238342, + 0.6790838241577148, + 0.7157034873962402, + 0.7523230910301208, + 0.7889426946640015, + 0.8255622982978821, + 0.8621819615364075, + 0.8988015651702881, + 0.9354211688041687, + 0.9720407724380493, + 1.0086603164672852, + 1.0452799797058105, + 1.081899642944336 + ] + } + }, + "transformer.layers.9.4.ff.0.0.weight": { + "min": -0.22493921220302582, + "max": 0.2511034309864044, + "mean": 0.0003591308486647904, + "std": 0.04076593369245529, + "abs_mean": 0.03243311122059822, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 83.48428344726562, + "elements": 4194304, + "histogram": { + "counts": [ + 4, + 3, + 7, + 12, + 22, + 55, + 71, + 91, + 127, + 144, + 126, + 112, + 96, + 46, + 38, + 26, + 13, + 5, + 1, + 1 + ], + "bin_edges": [ + -0.1372128278017044, + -0.1232616975903511, + -0.1093105673789978, + -0.0953594297170639, + -0.0814082995057106, + -0.0674571692943573, + -0.0535060316324234, + -0.0395549014210701, + -0.025603771209716797, + -0.011652633547782898, + 0.002298489212989807, + 0.016249626874923706, + 0.030200764536857605, + 0.04415188729763031, + 0.05810302495956421, + 0.07205414772033691, + 0.08600528538227081, + 0.09995642304420471, + 0.11390756070613861, + 0.12785868346691132, + 0.14180982112884521 + ] + } + }, + "transformer.layers.9.4.ff.0.0.bias": { + "min": -0.09088904410600662, + "max": 0.04371574521064758, + "mean": -0.030075963586568832, + "std": 0.01758558303117752, + "abs_mean": 0.030652616173028946, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 2.229682445526123, + "elements": 4096, + "histogram": { + "counts": [ + 2, + 7, + 8, + 11, + 25, + 52, + 87, + 108, + 124, + 135, + 127, + 114, + 87, + 52, + 36, + 16, + 5, + 1, + 1, + 2 + ], + "bin_edges": [ + -0.09088904410600662, + -0.08454560488462448, + -0.07820217311382294, + -0.0718587338924408, + -0.06551529467105865, + -0.05917186290025711, + -0.05282842367887497, + -0.04648498818278313, + -0.040141552686691284, + -0.03379811719059944, + -0.0274546816945076, + -0.021111242473125458, + -0.014767803251743317, + -0.008424371480941772, + -0.0020809322595596313, + 0.004262499511241913, + 0.010605938732624054, + 0.016949377954006195, + 0.02329280972480774, + 0.02963624894618988, + 0.03597967326641083 + ] + } + }, + "transformer.layers.9.4.ff.2.weight": { + "min": -0.35314324498176575, + "max": 0.303651362657547, + "mean": -4.348178117652424e-05, + "std": 0.03712818771600723, + "abs_mean": 0.029329190030694008, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ], + "norm": 76.03089904785156, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 4, + 0, + 7, + 13, + 36, + 53, + 71, + 88, + 107, + 154, + 126, + 119, + 99, + 65, + 31, + 16, + 6, + 3, + 1 + ], + "bin_edges": [ + -0.13770392537117004, + -0.12496820092201233, + -0.11223247647285461, + -0.0994967445731163, + -0.08676102012395859, + -0.07402529567480087, + -0.06128956377506256, + -0.048553839325904846, + -0.03581811487674713, + -0.023082390427589417, + -0.010346665978431702, + 0.002389058470726013, + 0.015124797821044922, + 0.027860522270202637, + 0.04059624671936035, + 0.053331971168518066, + 0.06606769561767578, + 0.0788034200668335, + 0.09153914451599121, + 0.10427486896514893, + 0.11701059341430664 + ] + } + }, + "transformer.layers.9.4.ff.2.bias": { + "min": -0.16180230677127838, + "max": 0.0634349063038826, + "mean": -8.249300299212337e-05, + "std": 0.019394585862755775, + "abs_mean": 0.01485530100762844, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.6203292012214661, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 2, + 2, + 2, + 16, + 47, + 127, + 215, + 238, + 190, + 103, + 44, + 10, + 3 + ], + "bin_edges": [ + -0.16180230677127838, + -0.15054044127464294, + -0.1392785906791687, + -0.12801672518253326, + -0.11675485968589783, + -0.10549300163984299, + -0.09423114359378815, + -0.08296927809715271, + -0.07170742005109787, + -0.06044556200504303, + -0.04918369650840759, + -0.03792183846235275, + -0.026659980416297913, + -0.015398114919662476, + -0.004136249423027039, + 0.007125601172447205, + 0.01838746666908264, + 0.02964933216571808, + 0.04091118276119232, + 0.05217304825782776, + 0.0634349063038826 + ] + } + }, + "transformer.layers.10.1.g": { + "min": 0.34883353114128113, + "max": 0.7206243872642517, + "mean": 0.5422865748405457, + "std": 0.03884800896048546, + "abs_mean": 0.5422865748405457, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 17.39759635925293, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 1, + 1, + 7, + 9, + 15, + 16, + 52, + 106, + 161, + 227, + 178, + 164, + 39, + 13, + 4, + 2, + 2, + 0, + 1 + ], + "bin_edges": [ + 0.34883353114128113, + 0.36742308735847473, + 0.38601261377334595, + 0.40460216999053955, + 0.42319169640541077, + 0.44178125262260437, + 0.4603707790374756, + 0.4789603352546692, + 0.4975498914718628, + 0.5161393880844116, + 0.5347289443016052, + 0.5533185005187988, + 0.5719080567359924, + 0.590497612953186, + 0.6090871095657349, + 0.6276766657829285, + 0.6462662220001221, + 0.6648557782173157, + 0.6834453344345093, + 0.7020348310470581, + 0.7206243872642517 + ] + } + }, + "transformer.layers.10.2.to_q.weight": { + "min": -0.21920670568943024, + "max": 0.22291362285614014, + "mean": -1.1165878277097363e-05, + "std": 0.039236169308423996, + "abs_mean": 0.03101443126797676, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 40.177345275878906, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 7, + 9, + 18, + 34, + 65, + 69, + 124, + 104, + 134, + 113, + 95, + 74, + 64, + 40, + 18, + 13, + 8, + 7, + 3 + ], + "bin_edges": [ + -0.11750198900699615, + -0.10525693744421005, + -0.09301188588142395, + -0.08076683431863785, + -0.06852178275585175, + -0.05627673119306564, + -0.04403167963027954, + -0.03178662806749344, + -0.019541576504707336, + -0.007296524941921234, + 0.004948526620864868, + 0.017193570733070374, + 0.029438629746437073, + 0.04168368875980377, + 0.05392873287200928, + 0.06617377698421478, + 0.07841883599758148, + 0.09066389501094818, + 0.10290893912315369, + 0.11515398323535919, + 0.1273990422487259 + ] + } + }, + "transformer.layers.10.2.to_q.bias": { + "min": -0.11826413869857788, + "max": 0.17058128118515015, + "mean": 0.0002835137420333922, + "std": 0.02510087564587593, + "abs_mean": 0.017988789826631546, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.8028870820999146, + "elements": 1024, + "histogram": { + "counts": [ + 3, + 1, + 2, + 9, + 14, + 44, + 131, + 233, + 278, + 180, + 62, + 23, + 10, + 1, + 5, + 1, + 1, + 1, + 0, + 1 + ], + "bin_edges": [ + -0.11826413869857788, + -0.10382186621427536, + -0.08937959372997284, + -0.07493732869625092, + -0.060495056211948395, + -0.046052783727645874, + -0.03161051869392395, + -0.01716824620962143, + -0.0027259737253189087, + 0.011716291308403015, + 0.026158571243286133, + 0.04060083627700806, + 0.05504310131072998, + 0.0694853812456131, + 0.08392764627933502, + 0.09836992621421814, + 0.11281219124794006, + 0.127254456281662, + 0.1416967213153839, + 0.15613901615142822, + 0.17058128118515015 + ] + } + }, + "transformer.layers.10.2.to_k.weight": { + "min": -0.2464587390422821, + "max": 0.3006129264831543, + "mean": -3.662023664219305e-05, + "std": 0.03893572464585304, + "abs_mean": 0.030795468017458916, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 39.86971664428711, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 1, + 1, + 2, + 13, + 22, + 27, + 48, + 84, + 117, + 142, + 154, + 127, + 100, + 77, + 41, + 24, + 10, + 5, + 4 + ], + "bin_edges": [ + -0.166384756565094, + -0.15166360139846802, + -0.13694246113300323, + -0.12222130596637726, + -0.10750015825033188, + -0.0927790105342865, + -0.07805785536766052, + -0.06333670765161514, + -0.04861555993556976, + -0.03389440476894379, + -0.019173264503479004, + -0.004452109336853027, + 0.01026904582977295, + 0.024990186095237732, + 0.03971134126186371, + 0.05443248152732849, + 0.06915363669395447, + 0.08387479186058044, + 0.09859594702720642, + 0.11331707239151001, + 0.1280382126569748 + ] + } + }, + "transformer.layers.10.2.to_k.bias": { + "min": -3.4999661445617676, + "max": 3.709076166152954, + "mean": 0.015840880572795868, + "std": 0.7814859747886658, + "abs_mean": 0.422378271818161, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 25.00047492980957, + "elements": 1024, + "histogram": { + "counts": [ + 5, + 3, + 10, + 12, + 5, + 7, + 14, + 25, + 122, + 453, + 230, + 43, + 24, + 10, + 7, + 7, + 6, + 7, + 6, + 4 + ], + "bin_edges": [ + -3.4999661445617676, + -3.1395139694213867, + -2.779061794281006, + -2.418609857559204, + -2.0581576824188232, + -1.6977055072784424, + -1.3372535705566406, + -0.9768013954162598, + -0.6163492202758789, + -0.25589704513549805, + 0.10455513000488281, + 0.46500706672668457, + 0.8254590034484863, + 1.1859111785888672, + 1.546363353729248, + 1.906815528869629, + 2.2672677040100098, + 2.6277198791503906, + 2.9881720542907715, + 3.3486242294311523, + 3.709076166152954 + ] + } + }, + "transformer.layers.10.2.to_v.weight": { + "min": -0.2185182124376297, + "max": 0.23746132850646973, + "mean": -1.3619632227346301e-05, + "std": 0.03630794584751129, + "abs_mean": 0.028688525781035423, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 37.17878723144531, + "elements": 1048576, + "histogram": { + "counts": [ + 6, + 7, + 15, + 26, + 37, + 65, + 85, + 112, + 136, + 134, + 136, + 99, + 60, + 44, + 22, + 6, + 6, + 1, + 2, + 1 + ], + "bin_edges": [ + -0.1074351966381073, + -0.09535156935453415, + -0.083267942070961, + -0.07118430733680725, + -0.0591006837785244, + -0.04701705649495125, + -0.0349334254860878, + -0.02284979820251465, + -0.010766170918941498, + 0.0013174563646316528, + 0.013401083648204803, + 0.025484710931777954, + 0.0375683456659317, + 0.049651965498924255, + 0.061735600233078, + 0.07381922006607056, + 0.0859028548002243, + 0.09798648953437805, + 0.1100701093673706, + 0.12215374410152435, + 0.1342373639345169 + ] + } + }, + "transformer.layers.10.2.to_v.bias": { + "min": -0.04712348431348801, + "max": 0.05133059248328209, + "mean": 0.00048102246364578605, + "std": 0.01351132895797491, + "abs_mean": 0.011189396493136883, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.43242543935775757, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 0, + 1, + 8, + 17, + 48, + 73, + 112, + 100, + 145, + 88, + 122, + 125, + 88, + 43, + 16, + 8, + 1, + 0, + 3 + ], + "bin_edges": [ + -0.04712348431348801, + -0.042412735521793365, + -0.037701983004808426, + -0.032991234213113785, + -0.028280483558773994, + -0.023569732904434204, + -0.018858984112739563, + -0.014148231595754623, + -0.009437482804059982, + -0.004726734012365341, + -1.598149538040161e-05, + 0.0046947672963142395, + 0.00940551608800888, + 0.01411626860499382, + 0.01882702112197876, + 0.0235377699136734, + 0.028248518705368042, + 0.03295926749706268, + 0.037670016288757324, + 0.04238077253103256, + 0.0470915250480175 + ] + } + }, + "transformer.layers.10.2.to_out.0.weight": { + "min": -0.21373434364795685, + "max": 0.2173190861940384, + "mean": 5.650868115480989e-05, + "std": 0.033619917929172516, + "abs_mean": 0.026529820635914803, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 34.42643737792969, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 0, + 4, + 11, + 29, + 51, + 71, + 109, + 127, + 145, + 154, + 123, + 85, + 35, + 33, + 7, + 7, + 6, + 0, + 1 + ], + "bin_edges": [ + -0.12208394706249237, + -0.10931171476840973, + -0.09653948247432709, + -0.08376725018024445, + -0.0709950178861618, + -0.05822278559207916, + -0.04545055329799652, + -0.03267832100391388, + -0.019906088709831238, + -0.007133856415748596, + 0.005638375878334045, + 0.018410608172416687, + 0.03118284046649933, + 0.04395507276058197, + 0.05672730505466461, + 0.06949953734874725, + 0.0822717696428299, + 0.09504400193691254, + 0.10781623423099518, + 0.12058846652507782, + 0.13336071372032166 + ] + } + }, + "transformer.layers.10.2.to_out.0.bias": { + "min": -0.21108141541481018, + "max": 0.23115544021129608, + "mean": -0.005106039810925722, + "std": 0.06184696406126022, + "abs_mean": 0.04949750006198883, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.984872817993164, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 2, + 6, + 14, + 38, + 66, + 84, + 101, + 142, + 156, + 111, + 116, + 79, + 38, + 22, + 12, + 9, + 1, + 0, + 1 + ], + "bin_edges": [ + -0.21108141541481018, + -0.18896956741809845, + -0.1668577343225479, + -0.14474588632583618, + -0.12263404577970505, + -0.10052220523357391, + -0.07841035723686218, + -0.056298524141311646, + -0.034186676144599915, + -0.012074828147888184, + 0.010037004947662354, + 0.032148852944374084, + 0.054260700941085815, + 0.07637253403663635, + 0.09848436713218689, + 0.12059623003005981, + 0.14270806312561035, + 0.1648198962211609, + 0.1869317591190338, + 0.20904359221458435, + 0.23115544021129608 + ] + } + }, + "transformer.layers.10.3.g": { + "min": 0.36205485463142395, + "max": 1.099104642868042, + "mean": 0.6992122530937195, + "std": 0.05326760187745094, + "abs_mean": 0.6992122530937195, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 22.439565658569336, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 2, + 0, + 3, + 26, + 114, + 352, + 309, + 129, + 32, + 16, + 7, + 0, + 3, + 2, + 2, + 1, + 1 + ], + "bin_edges": [ + 0.36205485463142395, + 0.398907333612442, + 0.43575984239578247, + 0.47261232137680054, + 0.5094648003578186, + 0.5463173389434814, + 0.5831698179244995, + 0.6200222969055176, + 0.6568747758865356, + 0.6937272548675537, + 0.7305797338485718, + 0.7674322128295898, + 0.8042846918106079, + 0.8411372303962708, + 0.8779897689819336, + 0.9148422479629517, + 0.9516947269439697, + 0.9885472059249878, + 1.0253996849060059, + 1.062252163887024, + 1.099104642868042 + ] + } + }, + "transformer.layers.10.4.ff.0.0.weight": { + "min": -0.23436696827411652, + "max": 0.24465103447437286, + "mean": 0.00046349139302037656, + "std": 0.04127480834722519, + "abs_mean": 0.03279660642147064, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 84.52825164794922, + "elements": 4194304, + "histogram": { + "counts": [ + 2, + 1, + 5, + 16, + 20, + 33, + 69, + 63, + 104, + 125, + 136, + 118, + 96, + 79, + 57, + 36, + 26, + 8, + 3, + 3 + ], + "bin_edges": [ + -0.13340413570404053, + -0.12061960995197296, + -0.1078350841999054, + -0.09505055844783783, + -0.08226603269577026, + -0.0694815069437027, + -0.05669698119163513, + -0.043912455439567566, + -0.0311279296875, + -0.018343403935432434, + -0.005558878183364868, + 0.007225647568702698, + 0.020010173320770264, + 0.03279469907283783, + 0.045579224824905396, + 0.05836375057697296, + 0.07114827632904053, + 0.0839328020811081, + 0.09671732783317566, + 0.10950185358524323, + 0.12228637933731079 + ] + } + }, + "transformer.layers.10.4.ff.0.0.bias": { + "min": -0.09793505817651749, + "max": 0.0681939497590065, + "mean": -0.03142588585615158, + "std": 0.0180974081158638, + "abs_mean": 0.03204537183046341, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 2.320847511291504, + "elements": 4096, + "histogram": { + "counts": [ + 3, + 6, + 9, + 27, + 71, + 93, + 155, + 162, + 161, + 153, + 92, + 39, + 21, + 4, + 0, + 2, + 0, + 1, + 0, + 1 + ], + "bin_edges": [ + -0.09469368308782578, + -0.0865493044257164, + -0.07840491831302643, + -0.07026053965091705, + -0.06211615726351738, + -0.053971774876117706, + -0.04582739621400833, + -0.03768301382660866, + -0.029538631439208984, + -0.02139425277709961, + -0.013249866664409637, + -0.0051054880023002625, + 0.0030388906598091125, + 0.011183276772499084, + 0.01932765543460846, + 0.02747204154729843, + 0.035616420209407806, + 0.04376079887151718, + 0.051905177533626556, + 0.060049571096897125, + 0.0681939497590065 + ] + } + }, + "transformer.layers.10.4.ff.2.weight": { + "min": -0.3012528717517853, + "max": 0.3511028289794922, + "mean": -8.16234532976523e-05, + "std": 0.04028059542179108, + "abs_mean": 0.0316866971552372, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ], + "norm": 82.4867172241211, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 1, + 10, + 18, + 38, + 51, + 71, + 114, + 117, + 159, + 124, + 117, + 80, + 53, + 23, + 11, + 8, + 2, + 1, + 1 + ], + "bin_edges": [ + -0.13198897242546082, + -0.11771106719970703, + -0.10343316197395325, + -0.08915524929761887, + -0.07487734407186508, + -0.0605994388461113, + -0.046321526169776917, + -0.03204362094402313, + -0.017765715718269348, + -0.003487810492515564, + 0.01079009473323822, + 0.025067999958992004, + 0.03934592008590698, + 0.05362382531166077, + 0.06790173053741455, + 0.08217963576316833, + 0.09645754098892212, + 0.1107354462146759, + 0.1250133514404297, + 0.13929125666618347, + 0.15356916189193726 + ] + } + }, + "transformer.layers.10.4.ff.2.bias": { + "min": -0.15210135281085968, + "max": 0.14944450557231903, + "mean": 0.00025588623248040676, + "std": 0.023021480068564415, + "abs_mean": 0.017337616533041, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.7363730669021606, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 1, + 1, + 0, + 5, + 13, + 55, + 130, + 260, + 280, + 168, + 68, + 13, + 3, + 1, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.15210135281085968, + -0.13702405989170074, + -0.12194676697254181, + -0.10686947405338287, + -0.09179218113422394, + -0.076714888215065, + -0.06163759529590607, + -0.04656030237674713, + -0.031483009457588196, + -0.01640571653842926, + -0.0013284236192703247, + 0.01374886929988861, + 0.028826162219047546, + 0.04390345513820648, + 0.05898074805736542, + 0.07405804097652435, + 0.08913533389568329, + 0.10421262681484222, + 0.11928991973400116, + 0.1343672126531601, + 0.14944450557231903 + ] + } + }, + "transformer.layers.11.1.g": { + "min": 1.0, + "max": 1.0, + "mean": 1.0, + "std": 0.0, + "abs_mean": 1.0, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 32.0, + "elements": 1024, + "histogram": { + "counts": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1000, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "bin_edges": [ + 0.5, + 0.550000011920929, + 0.6000000238418579, + 0.6499999761581421, + 0.699999988079071, + 0.75, + 0.800000011920929, + 0.8500000238418579, + 0.8999999761581421, + 0.9500000476837158, + 1.0, + 1.0499999523162842, + 1.100000023841858, + 1.1500000953674316, + 1.2000000476837158, + 1.25, + 1.2999999523162842, + 1.350000023841858, + 1.4000000953674316, + 1.4500000476837158, + 1.5 + ] + } + }, + "transformer.layers.11.2.to_q.weight": { + "min": -0.031249936670064926, + "max": 0.031249839812517166, + "mean": -1.9292721844976768e-05, + "std": 0.01804409734904766, + "abs_mean": 0.01562901958823204, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 18.476978302001953, + "elements": 1048576, + "histogram": { + "counts": [ + 51, + 50, + 42, + 43, + 50, + 63, + 49, + 61, + 45, + 43, + 45, + 51, + 50, + 60, + 38, + 47, + 53, + 58, + 50, + 51 + ], + "bin_edges": [ + -0.031167268753051758, + -0.02804918773472309, + -0.024931106716394424, + -0.021813027560710907, + -0.01869494467973709, + -0.015576864592730999, + -0.012458784505724907, + -0.00934070348739624, + -0.0062226224690675735, + -0.003104541450738907, + 1.3539567589759827e-05, + 0.0031316205859184265, + 0.006249699741601944, + 0.00936778262257576, + 0.012485861778259277, + 0.015603944659233093, + 0.01872202381491661, + 0.021840102970600128, + 0.024958185851573944, + 0.02807626500725746, + 0.031194347888231277 + ] + } + }, + "transformer.layers.11.2.to_q.bias": { + "min": -0.031226642429828644, + "max": 0.03100142627954483, + "mean": -0.0010842883493751287, + "std": 0.01795371063053608, + "abs_mean": 0.015566134825348854, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.5752854347229004, + "elements": 1024, + "histogram": { + "counts": [ + 54, + 50, + 71, + 46, + 41, + 56, + 52, + 54, + 38, + 53, + 48, + 59, + 49, + 49, + 47, + 47, + 53, + 47, + 43, + 43 + ], + "bin_edges": [ + -0.031226642429828644, + -0.02811523899435997, + -0.025003835558891296, + -0.021892432123422623, + -0.01878102868795395, + -0.015669625252485275, + -0.012558221817016602, + -0.009446818381547928, + -0.006335414946079254, + -0.0032240115106105804, + -0.00011260807514190674, + 0.002998795360326767, + 0.006110198795795441, + 0.009221602231264114, + 0.012333005666732788, + 0.015444409102201462, + 0.018555812537670135, + 0.02166721597313881, + 0.024778619408607483, + 0.027890022844076157, + 0.03100142627954483 + ] + } + }, + "transformer.layers.11.2.to_k.weight": { + "min": -0.031249966472387314, + "max": 0.031249895691871643, + "mean": 3.5441100862954045e-06, + "std": 0.018044503405690193, + "abs_mean": 0.015626542270183563, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 18.477415084838867, + "elements": 1048576, + "histogram": { + "counts": [ + 47, + 54, + 39, + 49, + 49, + 44, + 41, + 44, + 43, + 55, + 57, + 63, + 52, + 52, + 39, + 66, + 51, + 43, + 61, + 51 + ], + "bin_edges": [ + -0.031233858317136765, + -0.028110237792134285, + -0.024986617267131805, + -0.021862998604774475, + -0.018739378079771996, + -0.015615757554769516, + -0.012492138892412186, + -0.009368518367409706, + -0.0062448978424072266, + -0.003121277317404747, + 2.343207597732544e-06, + 0.003125961869955063, + 0.006249580532312393, + 0.009373202919960022, + 0.012496821582317352, + 0.015620443969964981, + 0.01874406263232231, + 0.02186768129467964, + 0.02499130368232727, + 0.0281149223446846, + 0.03123854473233223 + ] + } + }, + "transformer.layers.11.2.to_k.bias": { + "min": -0.031156372278928757, + "max": 0.031184475868940353, + "mean": 0.0003338930255267769, + "std": 0.018065759912133217, + "abs_mean": 0.01575113646686077, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.57792067527771, + "elements": 1024, + "histogram": { + "counts": [ + 48, + 56, + 52, + 40, + 48, + 42, + 63, + 44, + 55, + 45, + 40, + 49, + 56, + 49, + 55, + 52, + 48, + 60, + 55, + 43 + ], + "bin_edges": [ + -0.031156372278928757, + -0.02803933061659336, + -0.024922287091612816, + -0.02180524542927742, + -0.018688201904296875, + -0.01557116024196148, + -0.012454118579626083, + -0.009337075054645538, + -0.0062200333923101425, + -0.0031029917299747467, + 1.405179500579834e-05, + 0.0031310953199863434, + 0.00624813511967659, + 0.009365178644657135, + 0.01248222216963768, + 0.015599261969327927, + 0.01871630549430847, + 0.021833349019289017, + 0.024950388818979263, + 0.02806743234395981, + 0.031184475868940353 + ] + } + }, + "transformer.layers.11.2.to_v.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "abs_mean": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ], + "norm": 0.0, + "elements": 1048576, + "histogram": { + "counts": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1000, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "bin_edges": [ + -0.5, + -0.44999998807907104, + -0.4000000059604645, + -0.3499999940395355, + -0.30000001192092896, + -0.25, + -0.19999998807907104, + -0.15000000596046448, + -0.09999999403953552, + -0.04999998211860657, + 0.0, + 0.050000011920928955, + 0.10000002384185791, + 0.15000003576278687, + 0.19999998807907104, + 0.25, + 0.30000001192092896, + 0.3500000238418579, + 0.40000003576278687, + 0.44999998807907104, + 0.5 + ] + } + }, + "transformer.layers.11.2.to_v.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "abs_mean": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ], + "norm": 0.0, + "elements": 1024, + "histogram": { + "counts": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1000, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "bin_edges": [ + -0.5, + -0.44999998807907104, + -0.4000000059604645, + -0.3499999940395355, + -0.30000001192092896, + -0.25, + -0.19999998807907104, + -0.15000000596046448, + -0.09999999403953552, + -0.04999998211860657, + 0.0, + 0.050000011920928955, + 0.10000002384185791, + 0.15000003576278687, + 0.19999998807907104, + 0.25, + 0.30000001192092896, + 0.3500000238418579, + 0.40000003576278687, + 0.44999998807907104, + 0.5 + ] + } + }, + "transformer.layers.11.2.to_out.0.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "abs_mean": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ], + "norm": 0.0, + "elements": 1048576, + "histogram": { + "counts": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1000, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "bin_edges": [ + -0.5, + -0.44999998807907104, + -0.4000000059604645, + -0.3499999940395355, + -0.30000001192092896, + -0.25, + -0.19999998807907104, + -0.15000000596046448, + -0.09999999403953552, + -0.04999998211860657, + 0.0, + 0.050000011920928955, + 0.10000002384185791, + 0.15000003576278687, + 0.19999998807907104, + 0.25, + 0.30000001192092896, + 0.3500000238418579, + 0.40000003576278687, + 0.44999998807907104, + 0.5 + ] + } + }, + "transformer.layers.11.2.to_out.0.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "abs_mean": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ], + "norm": 0.0, + "elements": 1024, + "histogram": { + "counts": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1000, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "bin_edges": [ + -0.5, + -0.44999998807907104, + -0.4000000059604645, + -0.3499999940395355, + -0.30000001192092896, + -0.25, + -0.19999998807907104, + -0.15000000596046448, + -0.09999999403953552, + -0.04999998211860657, + 0.0, + 0.050000011920928955, + 0.10000002384185791, + 0.15000003576278687, + 0.19999998807907104, + 0.25, + 0.30000001192092896, + 0.3500000238418579, + 0.40000003576278687, + 0.44999998807907104, + 0.5 + ] + } + }, + "transformer.layers.11.3.g": { + "min": 1.0, + "max": 1.0, + "mean": 1.0, + "std": 0.0, + "abs_mean": 1.0, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 32.0, + "elements": 1024, + "histogram": { + "counts": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1000, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "bin_edges": [ + 0.5, + 0.550000011920929, + 0.6000000238418579, + 0.6499999761581421, + 0.699999988079071, + 0.75, + 0.800000011920929, + 0.8500000238418579, + 0.8999999761581421, + 0.9500000476837158, + 1.0, + 1.0499999523162842, + 1.100000023841858, + 1.1500000953674316, + 1.2000000476837158, + 1.25, + 1.2999999523162842, + 1.350000023841858, + 1.4000000953674316, + 1.4500000476837158, + 1.5 + ] + } + }, + "transformer.layers.11.4.ff.0.0.weight": { + "min": -0.031249985098838806, + "max": 0.031249992549419403, + "mean": -8.39352924231207e-06, + "std": 0.018043218180537224, + "abs_mean": 0.015625, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 36.949981689453125, + "elements": 4194304, + "histogram": { + "counts": [ + 61, + 48, + 38, + 52, + 58, + 61, + 40, + 44, + 59, + 56, + 56, + 40, + 43, + 41, + 48, + 53, + 49, + 47, + 44, + 62 + ], + "bin_edges": [ + -0.03120383620262146, + -0.02808719128370285, + -0.02497054636478424, + -0.021853899583220482, + -0.018737254664301872, + -0.015620609745383263, + -0.012503962963819504, + -0.009387318044900894, + -0.0062706731259822845, + -0.003154028207063675, + -3.738328814506531e-05, + 0.0030792616307735443, + 0.006195910274982452, + 0.009312555193901062, + 0.012429200112819672, + 0.015545845031738281, + 0.01866248995065689, + 0.0217791348695755, + 0.02489577978849411, + 0.02801242470741272, + 0.031129073351621628 + ] + } + }, + "transformer.layers.11.4.ff.0.0.bias": { + "min": -0.03124961629509926, + "max": 0.031239181756973267, + "mean": 0.00015365774743258953, + "std": 0.017994258552789688, + "abs_mean": 0.015541428700089455, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 1.151534080505371, + "elements": 4096, + "histogram": { + "counts": [ + 41, + 51, + 42, + 64, + 49, + 41, + 50, + 56, + 39, + 57, + 55, + 44, + 48, + 51, + 54, + 54, + 54, + 50, + 55, + 45 + ], + "bin_edges": [ + -0.03124961629509926, + -0.02812858298420906, + -0.025007549673318863, + -0.021886516362428665, + -0.018765483051538467, + -0.01564444974064827, + -0.012523418292403221, + -0.009402384981513023, + -0.006281351670622826, + -0.003160318359732628, + -3.9285048842430115e-05, + 0.0030817463994026184, + 0.006202779710292816, + 0.009323813021183014, + 0.012444846332073212, + 0.01556587964296341, + 0.018686912953853607, + 0.021807946264743805, + 0.024928979575634003, + 0.0280500128865242, + 0.031171046197414398 + ] + } + }, + "transformer.layers.11.4.ff.2.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "abs_mean": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 4096 + ], + "norm": 0.0, + "elements": 4194304, + "histogram": { + "counts": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1000, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "bin_edges": [ + -0.5, + -0.44999998807907104, + -0.4000000059604645, + -0.3499999940395355, + -0.30000001192092896, + -0.25, + -0.19999998807907104, + -0.15000000596046448, + -0.09999999403953552, + -0.04999998211860657, + 0.0, + 0.050000011920928955, + 0.10000002384185791, + 0.15000003576278687, + 0.19999998807907104, + 0.25, + 0.30000001192092896, + 0.3500000238418579, + 0.40000003576278687, + 0.44999998807907104, + 0.5 + ] + } + }, + "transformer.layers.11.4.ff.2.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "abs_mean": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ], + "norm": 0.0, + "elements": 1024, + "histogram": { + "counts": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1000, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "bin_edges": [ + -0.5, + -0.44999998807907104, + -0.4000000059604645, + -0.3499999940395355, + -0.30000001192092896, + -0.25, + -0.19999998807907104, + -0.15000000596046448, + -0.09999999403953552, + -0.04999998211860657, + 0.0, + 0.050000011920928955, + 0.10000002384185791, + 0.15000003576278687, + 0.19999998807907104, + 0.25, + 0.30000001192092896, + 0.3500000238418579, + 0.40000003576278687, + 0.44999998807907104, + 0.5 + ] + } + }, + "transformer.layers.12.1.g": { + "min": 0.3829966187477112, + "max": 0.718121349811554, + "mean": 0.5806018114089966, + "std": 0.03862323611974716, + "abs_mean": 0.5806018114089966, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 18.620281219482422, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 3, + 3, + 1, + 0, + 8, + 14, + 19, + 44, + 100, + 116, + 212, + 192, + 153, + 71, + 38, + 16, + 3, + 3, + 2 + ], + "bin_edges": [ + 0.3829966187477112, + 0.3997528553009033, + 0.41650909185409546, + 0.4332653284072876, + 0.45002156496047974, + 0.4667778015136719, + 0.483534038066864, + 0.5002902746200562, + 0.5170465111732483, + 0.5338027477264404, + 0.5505589842796326, + 0.5673152208328247, + 0.5840714573860168, + 0.600827693939209, + 0.6175839304924011, + 0.6343401670455933, + 0.6510964035987854, + 0.6678526401519775, + 0.6846088767051697, + 0.7013651132583618, + 0.718121349811554 + ] + } + }, + "transformer.layers.12.2.to_q.weight": { + "min": -0.23782959580421448, + "max": 0.1963561624288559, + "mean": 2.6626767066773027e-05, + "std": 0.03746971860527992, + "abs_mean": 0.02968023158609867, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 38.36855697631836, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 4, + 1, + 11, + 17, + 27, + 54, + 93, + 123, + 132, + 127, + 122, + 108, + 74, + 40, + 28, + 20, + 11, + 4, + 2 + ], + "bin_edges": [ + -0.12963607907295227, + -0.11716864258050919, + -0.1047012135386467, + -0.09223377704620361, + -0.07976634800434113, + -0.06729891151189804, + -0.054831475019454956, + -0.04236404597759247, + -0.029896609485149384, + -0.0174291729927063, + -0.004961743950843811, + 0.007505685091018677, + 0.01997312903404236, + 0.032440558075904846, + 0.044907987117767334, + 0.057375431060791016, + 0.0698428601026535, + 0.08231028914451599, + 0.09477773308753967, + 0.10724516212940216, + 0.11971258372068405 + ] + } + }, + "transformer.layers.12.2.to_q.bias": { + "min": -0.11848776042461395, + "max": 0.1658152937889099, + "mean": 0.0009899433935061097, + "std": 0.027532605454325676, + "abs_mean": 0.019537298008799553, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.8811826705932617, + "elements": 1024, + "histogram": { + "counts": [ + 3, + 3, + 4, + 9, + 13, + 35, + 110, + 225, + 267, + 178, + 94, + 24, + 13, + 8, + 8, + 4, + 0, + 0, + 1, + 1 + ], + "bin_edges": [ + -0.11848776042461395, + -0.10427260398864746, + -0.09005745500326157, + -0.07584229856729507, + -0.06162714585661888, + -0.04741199314594269, + -0.033196836709976196, + -0.0189816877245903, + -0.00476653128862381, + 0.009448617696762085, + 0.023663774132728577, + 0.03787893056869507, + 0.05209408700466156, + 0.06630924344062805, + 0.08052438497543335, + 0.09473954141139984, + 0.10895469784736633, + 0.12316985428333282, + 0.13738499581813812, + 0.1516001671552658, + 0.1658152937889099 + ] + } + }, + "transformer.layers.12.2.to_k.weight": { + "min": -0.2458610236644745, + "max": 0.5000857710838318, + "mean": -5.0437982281437144e-05, + "std": 0.037627607583999634, + "abs_mean": 0.029811149463057518, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 38.53023910522461, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 3, + 8, + 23, + 39, + 65, + 119, + 149, + 173, + 152, + 119, + 71, + 36, + 25, + 9, + 5, + 1, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.13122329115867615, + -0.1159626767039299, + -0.10070206224918365, + -0.08544144034385681, + -0.07018082588911057, + -0.05492021143436432, + -0.039659589529037476, + -0.02439897507429123, + -0.009138360619544983, + 0.00612226128578186, + 0.02138286828994751, + 0.03664349019527435, + 0.051904112100601196, + 0.06716471910476685, + 0.08242534101009369, + 0.09768594801425934, + 0.11294656991958618, + 0.12820717692375183, + 0.14346781373023987, + 0.15872842073440552, + 0.17398902773857117 + ] + } + }, + "transformer.layers.12.2.to_k.bias": { + "min": -3.936108350753784, + "max": 3.7635273933410645, + "mean": -0.003571532666683197, + "std": 0.6807447671890259, + "abs_mean": 0.44434577226638794, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 21.77349281311035, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 1, + 2, + 2, + 6, + 6, + 13, + 37, + 89, + 257, + 337, + 162, + 39, + 28, + 5, + 4, + 3, + 4, + 1, + 2 + ], + "bin_edges": [ + -3.936108350753784, + -3.551126480102539, + -3.166144847869873, + -2.781162977218628, + -2.396181106567383, + -2.011199474334717, + -1.6262176036834717, + -1.2412359714508057, + -0.8562541007995605, + -0.47127223014831543, + -0.08629059791564941, + 0.2986910343170166, + 0.6836731433868408, + 1.0686547756195068, + 1.4536364078521729, + 1.838618516921997, + 2.223600149154663, + 2.608581781387329, + 2.9935638904571533, + 3.3785455226898193, + 3.7635273933410645 + ] + } + }, + "transformer.layers.12.2.to_v.weight": { + "min": -0.2272127866744995, + "max": 0.25125452876091003, + "mean": -1.1669091691146605e-05, + "std": 0.03743912652134895, + "abs_mean": 0.029495541006326675, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 38.337135314941406, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 0, + 1, + 4, + 9, + 21, + 38, + 90, + 131, + 162, + 167, + 137, + 92, + 78, + 42, + 17, + 5, + 2, + 0, + 2 + ], + "bin_edges": [ + -0.15501034259796143, + -0.1397821605205536, + -0.12455396354198456, + -0.10932578146457672, + -0.09409759193658829, + -0.07886940240859985, + -0.06364122033119202, + -0.04841303080320358, + -0.03318484127521515, + -0.017956659197807312, + -0.0027284622192382812, + 0.012499719858169556, + 0.027727901935577393, + 0.04295609891414642, + 0.05818428099155426, + 0.07341247797012329, + 0.08864066004753113, + 0.10386884212493896, + 0.1190970242023468, + 0.13432523608207703, + 0.14955341815948486 + ] + } + }, + "transformer.layers.12.2.to_v.bias": { + "min": -0.07160257548093796, + "max": 0.08056868612766266, + "mean": -0.0005193912656977773, + "std": 0.015654100105166435, + "abs_mean": 0.012508584186434746, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.5009623169898987, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 1, + 4, + 12, + 21, + 85, + 138, + 182, + 176, + 155, + 136, + 61, + 20, + 4, + 2, + 0, + 1, + 0, + 1 + ], + "bin_edges": [ + -0.07160257548093796, + -0.06399401277303696, + -0.056385450065135956, + -0.048776887357234955, + -0.041168324649333954, + -0.03355976194143295, + -0.025951199233531952, + -0.01834263652563095, + -0.01073407381772995, + -0.003125511109828949, + 0.004483051598072052, + 0.012091614305973053, + 0.019700177013874054, + 0.027308739721775055, + 0.034917302429676056, + 0.04252586513757706, + 0.05013442784547806, + 0.05774299055337906, + 0.06535155326128006, + 0.07296011596918106, + 0.08056868612766266 + ] + } + }, + "transformer.layers.12.2.to_out.0.weight": { + "min": -0.22808189690113068, + "max": 0.25764524936676025, + "mean": -2.8624439437408e-05, + "std": 0.03542578965425491, + "abs_mean": 0.02771892212331295, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 36.27553176879883, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 1, + 6, + 20, + 27, + 61, + 87, + 108, + 135, + 148, + 131, + 96, + 84, + 44, + 26, + 17, + 4, + 1, + 0, + 2 + ], + "bin_edges": [ + -0.11833298951387405, + -0.10563816130161285, + -0.09294333308935165, + -0.08024850487709045, + -0.06755367666482925, + -0.054858848452568054, + -0.042164020240306854, + -0.029469192028045654, + -0.016774363815784454, + -0.004079535603523254, + 0.008615292608737946, + 0.02131011337041855, + 0.034004949033260345, + 0.04669978469610214, + 0.059394605457782745, + 0.07208942621946335, + 0.08478426188230515, + 0.09747909754514694, + 0.11017391830682755, + 0.12286873906850815, + 0.13556356728076935 + ] + } + }, + "transformer.layers.12.2.to_out.0.bias": { + "min": -0.2000962197780609, + "max": 0.21490387618541718, + "mean": -0.0055319443345069885, + "std": 0.0682973712682724, + "abs_mean": 0.05413249880075455, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 2.1916093826293945, + "elements": 1024, + "histogram": { + "counts": [ + 7, + 9, + 10, + 35, + 36, + 53, + 72, + 108, + 115, + 134, + 106, + 105, + 80, + 45, + 43, + 19, + 12, + 5, + 4, + 2 + ], + "bin_edges": [ + -0.2000962197780609, + -0.1793462187051773, + -0.1585962176322937, + -0.1378462016582489, + -0.1170962005853653, + -0.09634619951248169, + -0.07559619098901749, + -0.054846182465553284, + -0.03409618139266968, + -0.013346180319786072, + 0.007403820753097534, + 0.028153836727142334, + 0.04890383780002594, + 0.06965383887290955, + 0.09040385484695435, + 0.11115384101867676, + 0.13190385699272156, + 0.15265387296676636, + 0.17340385913848877, + 0.19415387511253357, + 0.21490387618541718 + ] + } + }, + "transformer.layers.12.3.g": { + "min": 0.4052681028842926, + "max": 1.1870543956756592, + "mean": 0.7378469705581665, + "std": 0.05485502630472183, + "abs_mean": 0.7378469705581665, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 23.676198959350586, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 1, + 1, + 7, + 72, + 268, + 371, + 160, + 72, + 22, + 12, + 5, + 4, + 1, + 0, + 0, + 0, + 1, + 2 + ], + "bin_edges": [ + 0.46438509225845337, + 0.5005185604095459, + 0.5366520285606384, + 0.572785496711731, + 0.6089189648628235, + 0.645052433013916, + 0.6811858415603638, + 0.7173193693161011, + 0.7534527778625488, + 0.7895863056182861, + 0.8257197141647339, + 0.8618532419204712, + 0.897986650466919, + 0.9341201186180115, + 0.970253586769104, + 1.0063869953155518, + 1.042520523071289, + 1.0786540508270264, + 1.1147874593734741, + 1.1509208679199219, + 1.1870543956756592 + ] + } + }, + "transformer.layers.12.4.ff.0.0.weight": { + "min": -0.22090063989162445, + "max": 0.24591459333896637, + "mean": 0.0005211709067225456, + "std": 0.041342560201883316, + "abs_mean": 0.032862767577171326, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 84.66887664794922, + "elements": 4194304, + "histogram": { + "counts": [ + 4, + 5, + 11, + 18, + 33, + 44, + 83, + 105, + 127, + 129, + 141, + 87, + 93, + 47, + 35, + 15, + 14, + 5, + 2, + 2 + ], + "bin_edges": [ + -0.13292989134788513, + -0.1190410628914833, + -0.10515223443508148, + -0.09126341342926025, + -0.07737458497285843, + -0.0634857565164566, + -0.049596935510635376, + -0.03570810705423355, + -0.021819278597831726, + -0.007930450141429901, + 0.005958378314971924, + 0.019847199320793152, + 0.03373602032661438, + 0.0476248562335968, + 0.06151367723941803, + 0.07540251314640045, + 0.08929133415222168, + 0.10318015515804291, + 0.11706899106502533, + 0.13095781207084656, + 0.14484664797782898 + ] + } + }, + "transformer.layers.12.4.ff.0.0.bias": { + "min": -0.10329551994800568, + "max": 0.02418467588722706, + "mean": -0.03265417367219925, + "std": 0.0188569538295269, + "abs_mean": 0.03297993913292885, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 2.413226842880249, + "elements": 4096, + "histogram": { + "counts": [ + 5, + 8, + 8, + 18, + 30, + 39, + 57, + 78, + 97, + 121, + 136, + 115, + 106, + 75, + 53, + 30, + 16, + 6, + 1, + 1 + ], + "bin_edges": [ + -0.0908934697508812, + -0.08513956516981125, + -0.0793856531381607, + -0.07363174855709076, + -0.06787784397602081, + -0.06212393194437027, + -0.056370027363300323, + -0.05061611905694008, + -0.044862210750579834, + -0.03910830244421959, + -0.033354394137859344, + -0.027600489556789398, + -0.021846584975719452, + -0.01609267294406891, + -0.010338768362998962, + -0.004584856331348419, + 0.001169048249721527, + 0.006922952830791473, + 0.012676864862442017, + 0.018430769443511963, + 0.02418467588722706 + ] + } + }, + "transformer.layers.12.4.ff.2.weight": { + "min": -0.44879788160324097, + "max": 0.421781986951828, + "mean": -0.00043243536492809653, + "std": 0.046903904527425766, + "abs_mean": 0.03645244985818863, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ], + "norm": 96.05355072021484, + "elements": 4194304, + "histogram": { + "counts": [ + 2, + 1, + 4, + 3, + 18, + 27, + 59, + 105, + 138, + 184, + 165, + 131, + 94, + 33, + 25, + 5, + 2, + 2, + 1, + 1 + ], + "bin_edges": [ + -0.19662070274353027, + -0.17642301321029663, + -0.1562253087759018, + -0.13602760434150696, + -0.11582991480827332, + -0.09563221782445908, + -0.07543452084064484, + -0.05523681640625, + -0.03503912687301636, + -0.014841437339782715, + 0.005356267094612122, + 0.025553971529006958, + 0.0457516610622406, + 0.06594935059547424, + 0.08614706993103027, + 0.10634475946426392, + 0.12654244899749756, + 0.1467401385307312, + 0.16693782806396484, + 0.18713554739952087, + 0.20733323693275452 + ] + } + }, + "transformer.layers.12.4.ff.2.bias": { + "min": -0.25108596682548523, + "max": 0.46939900517463684, + "mean": 0.003194585908204317, + "std": 0.04450792446732521, + "abs_mean": 0.03129497915506363, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.4272236824035645, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 2, + 5, + 25, + 103, + 371, + 303, + 149, + 34, + 3, + 0, + 0, + 1, + 1, + 1, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.25108596682548523, + -0.2150617241859436, + -0.17903746664524078, + -0.14301320910453796, + -0.10698896646499634, + -0.07096472382545471, + -0.03494046628475189, + 0.0010837912559509277, + 0.037108033895492554, + 0.07313227653503418, + 0.1091565191745758, + 0.14518079161643982, + 0.18120503425598145, + 0.21722927689552307, + 0.2532535493373871, + 0.2892777621746063, + 0.32530203461647034, + 0.36132630705833435, + 0.3973505198955536, + 0.4333747923374176, + 0.46939900517463684 + ] + } + }, + "transformer.layers.13.0.weight": { + "min": -0.3169757127761841, + "max": 0.33316904306411743, + "mean": -2.5288825781899504e-05, + "std": 0.021290883421897888, + "abs_mean": 0.016878249123692513, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ], + "norm": 30.831497192382812, + "elements": 2097152, + "histogram": { + "counts": [ + 2, + 2, + 4, + 14, + 20, + 34, + 63, + 82, + 110, + 136, + 130, + 112, + 104, + 75, + 48, + 26, + 28, + 6, + 2, + 2 + ], + "bin_edges": [ + -0.07049302756786346, + -0.06357433646917343, + -0.0566556490957737, + -0.049736957997083664, + -0.04281827062368393, + -0.035899579524993896, + -0.028980888426303864, + -0.02206220105290413, + -0.015143509954214096, + -0.008224818855524063, + -0.0013061314821243286, + 0.005612559616565704, + 0.012531250715255737, + 0.01944994181394577, + 0.026368625462055206, + 0.03328731656074524, + 0.04020600765943527, + 0.047124698758125305, + 0.05404338985681534, + 0.06096208095550537, + 0.0678807720541954 + ] + } + }, + "transformer.layers.13.1.g": { + "min": 0.3246179223060608, + "max": 0.6840593218803406, + "mean": 0.5709414482116699, + "std": 0.04453985393047333, + "abs_mean": 0.5709414482116699, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 18.325580596923828, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 1, + 0, + 1, + 4, + 5, + 8, + 14, + 11, + 25, + 48, + 74, + 123, + 163, + 219, + 184, + 69, + 30, + 15, + 4 + ], + "bin_edges": [ + 0.3246179223060608, + 0.34259000420570374, + 0.3605620563030243, + 0.37853413820266724, + 0.3965061902999878, + 0.41447827219963074, + 0.4324503540992737, + 0.45042240619659424, + 0.4683944880962372, + 0.4863665699958801, + 0.5043386220932007, + 0.522310733795166, + 0.5402827858924866, + 0.5582548379898071, + 0.5762269496917725, + 0.5941989421844482, + 0.6121710538864136, + 0.6301431655883789, + 0.6481151580810547, + 0.66608726978302, + 0.6840593218803406 + ] + } + }, + "transformer.layers.13.2.to_q.weight": { + "min": -0.16449199616909027, + "max": 0.17385058104991913, + "mean": -4.8540678108111024e-05, + "std": 0.033184703439474106, + "abs_mean": 0.026343977078795433, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 33.98078155517578, + "elements": 1048576, + "histogram": { + "counts": [ + 3, + 3, + 6, + 19, + 37, + 64, + 88, + 137, + 134, + 154, + 128, + 96, + 55, + 28, + 22, + 14, + 5, + 5, + 1, + 1 + ], + "bin_edges": [ + -0.10950843244791031, + -0.09739306569099426, + -0.08527769148349762, + -0.07316232472658157, + -0.06104695796966553, + -0.04893159121274948, + -0.03681621700525284, + -0.024700850248336792, + -0.012585483491420746, + -0.0004701167345046997, + 0.011645250022411346, + 0.02376062422990799, + 0.03587599843740463, + 0.04799135774374008, + 0.060106731951236725, + 0.07222209125757217, + 0.08433746546506882, + 0.09645283967256546, + 0.10856819897890091, + 0.12068357318639755, + 0.1327989399433136 + ] + } + }, + "transformer.layers.13.2.to_q.bias": { + "min": -0.18657186627388, + "max": 0.14269262552261353, + "mean": 3.6818586522713304e-05, + "std": 0.029670175164937973, + "abs_mean": 0.02127697318792343, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.9489827156066895, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 5, + 2, + 7, + 14, + 39, + 123, + 216, + 284, + 179, + 78, + 23, + 15, + 7, + 1, + 2, + 4 + ], + "bin_edges": [ + -0.18657186627388, + -0.1701086461544037, + -0.15364542603492737, + -0.13718219101428986, + -0.12071897089481354, + -0.10425575077533722, + -0.0877925232052803, + -0.07132929563522339, + -0.05486607551574707, + -0.03840285539627075, + -0.021939635276794434, + -0.005476400256156921, + 0.010986819863319397, + 0.027450039982795715, + 0.04391327500343323, + 0.060376495122909546, + 0.07683971524238586, + 0.09330293536186218, + 0.1097661554813385, + 0.12622937560081482, + 0.14269262552261353 + ] + } + }, + "transformer.layers.13.2.to_k.weight": { + "min": -0.3801823556423187, + "max": 0.24568894505500793, + "mean": -1.0017960448749363e-05, + "std": 0.0327659472823143, + "abs_mean": 0.026017045602202415, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 33.55192565917969, + "elements": 1048576, + "histogram": { + "counts": [ + 7, + 17, + 28, + 45, + 73, + 96, + 108, + 116, + 105, + 105, + 104, + 65, + 58, + 28, + 22, + 13, + 7, + 1, + 0, + 2 + ], + "bin_edges": [ + -0.0795261487364769, + -0.06986591219902039, + -0.06020567566156387, + -0.05054543912410736, + -0.04088520258665085, + -0.031224966049194336, + -0.021564729511737823, + -0.011904492974281311, + -0.0022442564368247986, + 0.007415980100631714, + 0.017076216638088226, + 0.02673645317554474, + 0.03639668971300125, + 0.04605693370103836, + 0.055717162787914276, + 0.06537739187479019, + 0.0750376358628273, + 0.08469787985086441, + 0.09435810893774033, + 0.10401833802461624, + 0.11367857456207275 + ] + } + }, + "transformer.layers.13.2.to_k.bias": { + "min": -3.6502017974853516, + "max": 3.2850754261016846, + "mean": -0.014260413125157356, + "std": 0.9845133423805237, + "abs_mean": 0.7433228492736816, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 31.492347717285156, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 1, + 6, + 12, + 14, + 23, + 46, + 69, + 101, + 127, + 189, + 159, + 89, + 59, + 44, + 21, + 24, + 9, + 3, + 2 + ], + "bin_edges": [ + -3.6502017974853516, + -3.3034379482269287, + -2.956674098968506, + -2.609910249710083, + -2.26314640045166, + -1.9163825511932373, + -1.5696187019348145, + -1.2228548526763916, + -0.8760910034179688, + -0.5293271541595459, + -0.18256330490112305, + 0.1642005443572998, + 0.5109643936157227, + 0.8577280044555664, + 1.2044920921325684, + 1.5512561798095703, + 1.898019790649414, + 2.244783401489258, + 2.5915474891662598, + 2.9383115768432617, + 3.2850754261016846 + ] + } + }, + "transformer.layers.13.2.to_v.weight": { + "min": -0.2349099963903427, + "max": 0.2473423033952713, + "mean": -1.7784623196348548e-05, + "std": 0.04170290008187294, + "abs_mean": 0.03274451196193695, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 42.70320510864258, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 5, + 9, + 26, + 36, + 91, + 134, + 145, + 163, + 132, + 107, + 75, + 43, + 18, + 8, + 2, + 2, + 1, + 0, + 1 + ], + "bin_edges": [ + -0.13875167071819305, + -0.12214882671833038, + -0.10554599016904831, + -0.08894315361976624, + -0.07234030961990356, + -0.055737465620040894, + -0.03913462907075882, + -0.022531792521476746, + -0.005928948521614075, + 0.010673895478248596, + 0.027276739478111267, + 0.043879568576812744, + 0.060482412576675415, + 0.07708525657653809, + 0.09368808567523956, + 0.11029092967510223, + 0.1268937736749649, + 0.14349661767482758, + 0.16009946167469025, + 0.17670230567455292, + 0.1933051198720932 + ] + } + }, + "transformer.layers.13.2.to_v.bias": { + "min": -0.07268015295267105, + "max": 0.1542970985174179, + "mean": 0.000663664482999593, + "std": 0.02515619620680809, + "abs_mean": 0.01977265253663063, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.8048853874206543, + "elements": 1024, + "histogram": { + "counts": [ + 7, + 13, + 32, + 81, + 110, + 180, + 191, + 142, + 117, + 69, + 36, + 18, + 2, + 1, + 0, + 0, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.07268015295267105, + -0.06133129075169563, + -0.049982428550720215, + -0.0386335626244545, + -0.02728470042347908, + -0.015935838222503662, + -0.0045869722962379456, + 0.006761886179447174, + 0.01811075210571289, + 0.029459618031978607, + 0.04080847650766373, + 0.05215734243392944, + 0.06350620836019516, + 0.07485506683588028, + 0.0862039253115654, + 0.09755279868841171, + 0.10890165716409683, + 0.12025051563978195, + 0.13159939646720886, + 0.1429482400417328, + 0.1542970985174179 + ] + } + }, + "transformer.layers.13.2.to_out.0.weight": { + "min": -0.2664458751678467, + "max": 0.2483866959810257, + "mean": -1.5342577171395533e-05, + "std": 0.040143273770809174, + "abs_mean": 0.03165973350405693, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 41.10619354248047, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 0, + 4, + 6, + 11, + 19, + 50, + 74, + 109, + 151, + 153, + 150, + 99, + 78, + 51, + 29, + 11, + 1, + 2, + 1 + ], + "bin_edges": [ + -0.1581651270389557, + -0.14322325587272644, + -0.1282813996076584, + -0.11333952844142914, + -0.09839766472578049, + -0.08345580101013184, + -0.06851392984390259, + -0.05357206612825394, + -0.038630202412605286, + -0.023688331246376038, + -0.008746474981307983, + 0.006195396184921265, + 0.021137267351150513, + 0.03607912361621857, + 0.051020994782447815, + 0.06596285104751587, + 0.08090472221374512, + 0.09584659337997437, + 0.11078846454620361, + 0.12573030591011047, + 0.14067217707633972 + ] + } + }, + "transformer.layers.13.2.to_out.0.bias": { + "min": -0.18931904435157776, + "max": 0.19443899393081665, + "mean": -0.0012288358993828297, + "std": 0.06666287034749985, + "abs_mean": 0.054270241409540176, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 2.1325325965881348, + "elements": 1024, + "histogram": { + "counts": [ + 3, + 7, + 14, + 25, + 33, + 55, + 81, + 100, + 107, + 101, + 108, + 88, + 80, + 67, + 55, + 42, + 18, + 11, + 3, + 2 + ], + "bin_edges": [ + -0.18931904435157776, + -0.1701311469078064, + -0.15094324946403503, + -0.13175533711910248, + -0.11256743967533112, + -0.09337954223155975, + -0.0741916373372078, + -0.055003732442855835, + -0.03581583499908447, + -0.01662793755531311, + 0.002559959888458252, + 0.021747872233390808, + 0.04093576967716217, + 0.06012366712093353, + 0.07931157946586609, + 0.09849947690963745, + 0.11768737435340881, + 0.13687527179718018, + 0.15606316924095154, + 0.1752510666847229, + 0.19443899393081665 + ] + } + }, + "transformer.layers.13.3.g": { + "min": 0.32919859886169434, + "max": 0.997564435005188, + "mean": 0.7190552949905396, + "std": 0.051983967423439026, + "abs_mean": 0.7190552949905396, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 23.069761276245117, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 0, + 1, + 4, + 3, + 17, + 71, + 220, + 309, + 224, + 91, + 35, + 15, + 2, + 4, + 1, + 2 + ], + "bin_edges": [ + 0.32919859886169434, + 0.3626168966293335, + 0.39603519439697266, + 0.42945346236228943, + 0.4628717601299286, + 0.49629005789756775, + 0.5297083258628845, + 0.5631266236305237, + 0.5965449213981628, + 0.629963219165802, + 0.6633815169334412, + 0.6967997550964355, + 0.7302180528640747, + 0.7636363506317139, + 0.797054648399353, + 0.8304729461669922, + 0.8638912439346313, + 0.8973095417022705, + 0.9307278394699097, + 0.9641461372375488, + 0.997564435005188 + ] + } + }, + "transformer.layers.13.4.ff.0.0.weight": { + "min": -0.2313733994960785, + "max": 0.24550800025463104, + "mean": 0.00018263014499098063, + "std": 0.04090628772974014, + "abs_mean": 0.03251039609313011, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 83.76873016357422, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 0, + 0, + 1, + 11, + 15, + 27, + 49, + 59, + 95, + 121, + 137, + 122, + 118, + 106, + 67, + 31, + 23, + 13, + 4 + ], + "bin_edges": [ + -0.16133558750152588, + -0.1475898176431656, + -0.1338440328836441, + -0.12009826302528381, + -0.10635248571634293, + -0.09260670840740204, + -0.07886093854904175, + -0.06511516124010086, + -0.05136938393115997, + -0.037623606622219086, + -0.023877829313278198, + -0.010132059454917908, + 0.003613710403442383, + 0.017359495162963867, + 0.031105265021324158, + 0.04485104978084564, + 0.05859681963920593, + 0.07234258949756622, + 0.08608837425708771, + 0.099834144115448, + 0.11357992142438889 + ] + } + }, + "transformer.layers.13.4.ff.0.0.bias": { + "min": -0.11402574181556702, + "max": 0.018650896847248077, + "mean": -0.0424647182226181, + "std": 0.0188254714012146, + "abs_mean": 0.042548101395368576, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 2.972773313522339, + "elements": 4096, + "histogram": { + "counts": [ + 4, + 1, + 7, + 10, + 19, + 36, + 56, + 76, + 113, + 141, + 125, + 148, + 100, + 78, + 50, + 19, + 10, + 3, + 3, + 1 + ], + "bin_edges": [ + -0.10659940540790558, + -0.10033688694238663, + -0.09407437592744827, + -0.08781185746192932, + -0.08154934644699097, + -0.07528682798147202, + -0.06902430951595306, + -0.06276179850101471, + -0.05649928003549576, + -0.050236765295267105, + -0.04397425055503845, + -0.0377117320895195, + -0.03144921362400055, + -0.025186702609062195, + -0.018924184143543243, + -0.012661673128604889, + -0.0063991546630859375, + -0.00013663619756698608, + 0.006125874817371368, + 0.01238839328289032, + 0.018650896847248077 + ] + } + }, + "transformer.layers.13.4.ff.2.weight": { + "min": -0.3894314467906952, + "max": 0.4067791998386383, + "mean": -2.1846279196324758e-05, + "std": 0.048540692776441574, + "abs_mean": 0.03770503029227257, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ], + "norm": 99.40045928955078, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 2, + 1, + 15, + 20, + 39, + 79, + 146, + 184, + 194, + 126, + 93, + 57, + 21, + 14, + 5, + 2, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.19731265306472778, + -0.17545032501220703, + -0.15358801186084747, + -0.13172568380832672, + -0.10986336320638657, + -0.08800104260444641, + -0.06613871455192566, + -0.0442764014005661, + -0.02241407334804535, + -0.0005517452955245972, + 0.02131056785583496, + 0.04317289590835571, + 0.06503522396087646, + 0.08689755201339722, + 0.10875985026359558, + 0.13062217831611633, + 0.15248450636863708, + 0.17434683442115784, + 0.1962091624736786, + 0.21807146072387695, + 0.2399337887763977 + ] + } + }, + "transformer.layers.13.4.ff.2.bias": { + "min": -0.692162811756134, + "max": 0.4120035469532013, + "mean": 0.000852768833283335, + "std": 0.060242246836423874, + "abs_mean": 0.039657142013311386, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.9270035028457642, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 4, + 34, + 200, + 463, + 246, + 39, + 7, + 1, + 1, + 1, + 1 + ], + "bin_edges": [ + -0.692162811756134, + -0.6369544863700867, + -0.5817461609840393, + -0.5265378952026367, + -0.47132954001426697, + -0.4161212146282196, + -0.36091291904449463, + -0.30570459365844727, + -0.2504962682723999, + -0.19528794288635254, + -0.14007961750030518, + -0.08487129211425781, + -0.029663026332855225, + 0.02554529905319214, + 0.0807536244392395, + 0.13596194982528687, + 0.19117027521133423, + 0.2463786005973816, + 0.30158692598342896, + 0.3567952513694763, + 0.4120035469532013 + ] + } + }, + "transformer.layers.14.0.weight": { + "min": 0.0, + "max": 1.0, + "mean": 0.00048828125, + "std": 0.0220916960388422, + "abs_mean": 0.00048828125, + "sparsity": 0.99951171875, + "shape": [ + 1024, + 2048 + ], + "norm": 32.0, + "elements": 2097152, + "histogram": { + "counts": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1000, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "bin_edges": [ + -0.5, + -0.44999998807907104, + -0.4000000059604645, + -0.3499999940395355, + -0.30000001192092896, + -0.25, + -0.19999998807907104, + -0.15000000596046448, + -0.09999999403953552, + -0.04999998211860657, + 0.0, + 0.050000011920928955, + 0.10000002384185791, + 0.15000003576278687, + 0.19999998807907104, + 0.25, + 0.30000001192092896, + 0.3500000238418579, + 0.40000003576278687, + 0.44999998807907104, + 0.5 + ] + } + }, + "transformer.layers.14.1.g": { + "min": 1.0, + "max": 1.0, + "mean": 1.0, + "std": 0.0, + "abs_mean": 1.0, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 32.0, + "elements": 1024, + "histogram": { + "counts": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1000, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "bin_edges": [ + 0.5, + 0.550000011920929, + 0.6000000238418579, + 0.6499999761581421, + 0.699999988079071, + 0.75, + 0.800000011920929, + 0.8500000238418579, + 0.8999999761581421, + 0.9500000476837158, + 1.0, + 1.0499999523162842, + 1.100000023841858, + 1.1500000953674316, + 1.2000000476837158, + 1.25, + 1.2999999523162842, + 1.350000023841858, + 1.4000000953674316, + 1.4500000476837158, + 1.5 + ] + } + }, + "transformer.layers.14.2.to_q.weight": { + "min": -0.031249970197677612, + "max": 0.031249817460775375, + "mean": -2.1022657165303826e-05, + "std": 0.018035436049103737, + "abs_mean": 0.015622841194272041, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 18.46811866760254, + "elements": 1048576, + "histogram": { + "counts": [ + 66, + 57, + 51, + 56, + 44, + 39, + 44, + 44, + 63, + 52, + 42, + 43, + 49, + 41, + 49, + 40, + 48, + 58, + 58, + 56 + ], + "bin_edges": [ + -0.03121861070394516, + -0.028114574030041695, + -0.02501053735613823, + -0.021906500682234764, + -0.0188024640083313, + -0.015698427334427834, + -0.012594390660524368, + -0.009490353986620903, + -0.006386317312717438, + -0.0032822806388139725, + -0.0001782439649105072, + 0.002925790846347809, + 0.006029829382896423, + 0.009133867919445038, + 0.012237902730703354, + 0.01534193754196167, + 0.018445976078510284, + 0.0215500146150589, + 0.024654049426317215, + 0.02775808423757553, + 0.030862122774124146 + ] + } + }, + "transformer.layers.14.2.to_q.bias": { + "min": -0.03122086077928543, + "max": 0.031233571469783783, + "mean": -0.0006771883927285671, + "std": 0.01782997138798237, + "abs_mean": 0.015417349524796009, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.5706920027732849, + "elements": 1024, + "histogram": { + "counts": [ + 52, + 41, + 59, + 51, + 59, + 57, + 46, + 54, + 44, + 58, + 42, + 60, + 52, + 45, + 51, + 45, + 47, + 45, + 46, + 46 + ], + "bin_edges": [ + -0.03122086077928543, + -0.02809813991189003, + -0.02497541718184948, + -0.02185269631445408, + -0.01872997358441353, + -0.015607252717018127, + -0.012484531849622726, + -0.009361809119582176, + -0.006239088252186775, + -0.003116367384791374, + 6.355345249176025e-06, + 0.003129076212644577, + 0.006251797080039978, + 0.009374517947435379, + 0.012497242540121078, + 0.01561996340751648, + 0.01874268427491188, + 0.02186540514230728, + 0.024988126009702682, + 0.028110850602388382, + 0.031233571469783783 + ] + } + }, + "transformer.layers.14.2.to_k.weight": { + "min": -0.03124987706542015, + "max": 0.031249921768903732, + "mean": -8.839062502374873e-06, + "std": 0.01803446188569069, + "abs_mean": 0.015615805983543396, + "sparsity": 9.5367431640625e-07, + "shape": [ + 1024, + 1024 + ], + "norm": 18.467140197753906, + "elements": 1048576, + "histogram": { + "counts": [ + 56, + 50, + 49, + 51, + 53, + 57, + 44, + 69, + 39, + 45, + 39, + 46, + 61, + 42, + 49, + 49, + 49, + 56, + 39, + 57 + ], + "bin_edges": [ + -0.03123004361987114, + -0.02810853160917759, + -0.02498701959848404, + -0.02186550945043564, + -0.01874399743974209, + -0.015622485429048538, + -0.012500975281000137, + -0.009379463270306587, + -0.006257951259613037, + -0.003136439248919487, + -1.492723822593689e-05, + 0.003106582909822464, + 0.006228093057870865, + 0.009349606931209564, + 0.012471117079257965, + 0.015592630952596664, + 0.018714141100645065, + 0.021835651248693466, + 0.024957165122032166, + 0.028078675270080566, + 0.031200189143419266 + ] + } + }, + "transformer.layers.14.2.to_k.bias": { + "min": -0.031232360750436783, + "max": 0.031245984137058258, + "mean": -0.0007298353011719882, + "std": 0.017944591119885445, + "abs_mean": 0.015577686950564384, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.5744214653968811, + "elements": 1024, + "histogram": { + "counts": [ + 50, + 52, + 50, + 43, + 54, + 69, + 62, + 40, + 54, + 49, + 56, + 38, + 54, + 50, + 47, + 43, + 42, + 54, + 40, + 53 + ], + "bin_edges": [ + -0.031232360750436783, + -0.028108444064855576, + -0.02498452737927437, + -0.021860608831048012, + -0.018736692145466805, + -0.015612775459885597, + -0.01248885691165924, + -0.009364940226078033, + -0.006241023540496826, + -0.003117106854915619, + 6.809830665588379e-06, + 0.0031307265162467957, + 0.006254646927118301, + 0.009378563612699509, + 0.012502480298280716, + 0.015626396983861923, + 0.01875031366944313, + 0.021874230355024338, + 0.024998147040605545, + 0.028122063726186752, + 0.031245984137058258 + ] + } + }, + "transformer.layers.14.2.to_v.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "abs_mean": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ], + "norm": 0.0, + "elements": 1048576, + "histogram": { + "counts": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1000, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "bin_edges": [ + -0.5, + -0.44999998807907104, + -0.4000000059604645, + -0.3499999940395355, + -0.30000001192092896, + -0.25, + -0.19999998807907104, + -0.15000000596046448, + -0.09999999403953552, + -0.04999998211860657, + 0.0, + 0.050000011920928955, + 0.10000002384185791, + 0.15000003576278687, + 0.19999998807907104, + 0.25, + 0.30000001192092896, + 0.3500000238418579, + 0.40000003576278687, + 0.44999998807907104, + 0.5 + ] + } + }, + "transformer.layers.14.2.to_v.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "abs_mean": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ], + "norm": 0.0, + "elements": 1024, + "histogram": { + "counts": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1000, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "bin_edges": [ + -0.5, + -0.44999998807907104, + -0.4000000059604645, + -0.3499999940395355, + -0.30000001192092896, + -0.25, + -0.19999998807907104, + -0.15000000596046448, + -0.09999999403953552, + -0.04999998211860657, + 0.0, + 0.050000011920928955, + 0.10000002384185791, + 0.15000003576278687, + 0.19999998807907104, + 0.25, + 0.30000001192092896, + 0.3500000238418579, + 0.40000003576278687, + 0.44999998807907104, + 0.5 + ] + } + }, + "transformer.layers.14.2.to_out.0.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "abs_mean": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 1024 + ], + "norm": 0.0, + "elements": 1048576, + "histogram": { + "counts": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1000, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "bin_edges": [ + -0.5, + -0.44999998807907104, + -0.4000000059604645, + -0.3499999940395355, + -0.30000001192092896, + -0.25, + -0.19999998807907104, + -0.15000000596046448, + -0.09999999403953552, + -0.04999998211860657, + 0.0, + 0.050000011920928955, + 0.10000002384185791, + 0.15000003576278687, + 0.19999998807907104, + 0.25, + 0.30000001192092896, + 0.3500000238418579, + 0.40000003576278687, + 0.44999998807907104, + 0.5 + ] + } + }, + "transformer.layers.14.2.to_out.0.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "abs_mean": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ], + "norm": 0.0, + "elements": 1024, + "histogram": { + "counts": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1000, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "bin_edges": [ + -0.5, + -0.44999998807907104, + -0.4000000059604645, + -0.3499999940395355, + -0.30000001192092896, + -0.25, + -0.19999998807907104, + -0.15000000596046448, + -0.09999999403953552, + -0.04999998211860657, + 0.0, + 0.050000011920928955, + 0.10000002384185791, + 0.15000003576278687, + 0.19999998807907104, + 0.25, + 0.30000001192092896, + 0.3500000238418579, + 0.40000003576278687, + 0.44999998807907104, + 0.5 + ] + } + }, + "transformer.layers.14.3.g": { + "min": 1.0, + "max": 1.0, + "mean": 1.0, + "std": 0.0, + "abs_mean": 1.0, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 32.0, + "elements": 1024, + "histogram": { + "counts": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1000, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "bin_edges": [ + 0.5, + 0.550000011920929, + 0.6000000238418579, + 0.6499999761581421, + 0.699999988079071, + 0.75, + 0.800000011920929, + 0.8500000238418579, + 0.8999999761581421, + 0.9500000476837158, + 1.0, + 1.0499999523162842, + 1.100000023841858, + 1.1500000953674316, + 1.2000000476837158, + 1.25, + 1.2999999523162842, + 1.350000023841858, + 1.4000000953674316, + 1.4500000476837158, + 1.5 + ] + } + }, + "transformer.layers.14.4.ff.0.0.weight": { + "min": -0.03125, + "max": 0.031249988824129105, + "mean": 3.591749646147946e-06, + "std": 0.018040824681520462, + "abs_mean": 0.015623635612428188, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 36.945003509521484, + "elements": 4194304, + "histogram": { + "counts": [ + 44, + 44, + 36, + 44, + 48, + 49, + 52, + 50, + 49, + 51, + 55, + 40, + 56, + 52, + 54, + 46, + 67, + 56, + 45, + 62 + ], + "bin_edges": [ + -0.03122270107269287, + -0.028100185096263885, + -0.0249776691198349, + -0.021855153143405914, + -0.01873263716697693, + -0.015610120259225368, + -0.012487603351473808, + -0.009365087375044823, + -0.006242571398615837, + -0.0031200554221868515, + 2.4605542421340942e-06, + 0.003124978393316269, + 0.0062474943697452545, + 0.00937001034617424, + 0.012492526322603226, + 0.015615042299032211, + 0.018737558275461197, + 0.021860074251890182, + 0.024982590228319168, + 0.028105106204748154, + 0.03122762218117714 + ] + } + }, + "transformer.layers.14.4.ff.0.0.bias": { + "min": -0.031234480440616608, + "max": 0.031246982514858246, + "mean": 0.0001957040512934327, + "std": 0.018076537176966667, + "abs_mean": 0.015660608187317848, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 1.1568249464035034, + "elements": 4096, + "histogram": { + "counts": [ + 46, + 33, + 53, + 46, + 59, + 36, + 56, + 53, + 52, + 38, + 53, + 51, + 55, + 49, + 48, + 43, + 44, + 64, + 55, + 66 + ], + "bin_edges": [ + -0.031234480440616608, + -0.028114622458815575, + -0.02499476447701454, + -0.021874908357858658, + -0.018755050376057625, + -0.015635192394256592, + -0.012515336275100708, + -0.009395478293299675, + -0.006275620311498642, + -0.003155762329697609, + -3.590434789657593e-05, + 0.003083951771259308, + 0.006203807890415192, + 0.009323667734861374, + 0.012443523854017258, + 0.01556338369846344, + 0.018683239817619324, + 0.021803095936775208, + 0.02492295578122139, + 0.028042811900377274, + 0.031162668019533157 + ] + } + }, + "transformer.layers.14.4.ff.2.weight": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "abs_mean": 0.0, + "sparsity": 1.0, + "shape": [ + 1024, + 4096 + ], + "norm": 0.0, + "elements": 4194304, + "histogram": { + "counts": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1000, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "bin_edges": [ + -0.5, + -0.44999998807907104, + -0.4000000059604645, + -0.3499999940395355, + -0.30000001192092896, + -0.25, + -0.19999998807907104, + -0.15000000596046448, + -0.09999999403953552, + -0.04999998211860657, + 0.0, + 0.050000011920928955, + 0.10000002384185791, + 0.15000003576278687, + 0.19999998807907104, + 0.25, + 0.30000001192092896, + 0.3500000238418579, + 0.40000003576278687, + 0.44999998807907104, + 0.5 + ] + } + }, + "transformer.layers.14.4.ff.2.bias": { + "min": 0.0, + "max": 0.0, + "mean": 0.0, + "std": 0.0, + "abs_mean": 0.0, + "sparsity": 1.0, + "shape": [ + 1024 + ], + "norm": 0.0, + "elements": 1024, + "histogram": { + "counts": [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1000, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0 + ], + "bin_edges": [ + -0.5, + -0.44999998807907104, + -0.4000000059604645, + -0.3499999940395355, + -0.30000001192092896, + -0.25, + -0.19999998807907104, + -0.15000000596046448, + -0.09999999403953552, + -0.04999998211860657, + 0.0, + 0.050000011920928955, + 0.10000002384185791, + 0.15000003576278687, + 0.19999998807907104, + 0.25, + 0.30000001192092896, + 0.3500000238418579, + 0.40000003576278687, + 0.44999998807907104, + 0.5 + ] + } + }, + "transformer.layers.15.0.weight": { + "min": -0.23450319468975067, + "max": 0.2724616229534149, + "mean": 6.948144346097251e-06, + "std": 0.01881224475800991, + "abs_mean": 0.014990497380495071, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ], + "norm": 27.24210548400879, + "elements": 2097152, + "histogram": { + "counts": [ + 2, + 2, + 7, + 10, + 26, + 53, + 66, + 75, + 121, + 125, + 127, + 119, + 78, + 77, + 46, + 34, + 19, + 8, + 4, + 1 + ], + "bin_edges": [ + -0.06102960929274559, + -0.05501559376716614, + -0.049001578241586685, + -0.04298756271600723, + -0.03697354719042778, + -0.030959531664848328, + -0.024945516139268875, + -0.018931500613689423, + -0.01291748508810997, + -0.006903469562530518, + -0.0008894540369510651, + 0.005124557763338089, + 0.01113857701420784, + 0.01715259626507759, + 0.023166608065366745, + 0.0291806198656559, + 0.03519463911652565, + 0.0412086583673954, + 0.047222670167684555, + 0.05323668196797371, + 0.05925070121884346 + ] + } + }, + "transformer.layers.15.1.g": { + "min": 0.32128843665122986, + "max": 0.6922435760498047, + "mean": 0.5815606117248535, + "std": 0.045744746923446655, + "abs_mean": 0.5815606117248535, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 18.6673641204834, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 2, + 0, + 2, + 2, + 5, + 10, + 8, + 10, + 20, + 37, + 53, + 112, + 177, + 186, + 214, + 95, + 39, + 20, + 7 + ], + "bin_edges": [ + 0.32128843665122986, + 0.3398361802101135, + 0.3583839535713196, + 0.37693169713020325, + 0.3954794704914093, + 0.41402721405029297, + 0.432574987411499, + 0.4511227309703827, + 0.46967047452926636, + 0.4882182478904724, + 0.5067660212516785, + 0.5253137350082397, + 0.5438615083694458, + 0.5624092817306519, + 0.5809570550918579, + 0.599504828453064, + 0.6180525422096252, + 0.6366002559661865, + 0.6551480293273926, + 0.6736958026885986, + 0.6922435760498047 + ] + } + }, + "transformer.layers.15.2.to_q.weight": { + "min": -0.18168264627456665, + "max": 0.1974717229604721, + "mean": -1.171275016531581e-05, + "std": 0.03318728506565094, + "abs_mean": 0.026278000324964523, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 33.98333740234375, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 2, + 7, + 14, + 23, + 32, + 46, + 77, + 125, + 120, + 132, + 122, + 97, + 84, + 45, + 39, + 19, + 6, + 6, + 2 + ], + "bin_edges": [ + -0.10859622806310654, + -0.0980859100818634, + -0.08757558465003967, + -0.07706526666879654, + -0.0665549486875534, + -0.05604463070631027, + -0.04553430527448654, + -0.03502398729324341, + -0.024513669312000275, + -0.014003351330757141, + -0.0034930333495140076, + 0.007017292082309723, + 0.017527617514133453, + 0.02803792804479599, + 0.03854825347661972, + 0.04905856400728226, + 0.05956888943910599, + 0.07007921487092972, + 0.08058952540159225, + 0.09109985083341599, + 0.10161017626523972 + ] + } + }, + "transformer.layers.15.2.to_q.bias": { + "min": -0.16043128073215485, + "max": 0.1292782723903656, + "mean": -0.0010662535205483437, + "std": 0.034117527306079865, + "abs_mean": 0.02523173578083515, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.0917609930038452, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 1, + 4, + 5, + 5, + 3, + 23, + 38, + 79, + 131, + 195, + 200, + 147, + 91, + 36, + 17, + 13, + 5, + 3, + 2 + ], + "bin_edges": [ + -0.16043128073215485, + -0.14594580233097076, + -0.13146032392978668, + -0.1169748455286026, + -0.10248936712741852, + -0.08800388872623444, + -0.07351841032505035, + -0.05903293192386627, + -0.04454745352268219, + -0.030061975121498108, + -0.015576496720314026, + -0.0010910183191299438, + 0.013394460082054138, + 0.02787993848323822, + 0.0423654168844223, + 0.056850895285606384, + 0.07133637368679047, + 0.08582185208797455, + 0.10030733048915863, + 0.11479280889034271, + 0.1292782723903656 + ] + } + }, + "transformer.layers.15.2.to_k.weight": { + "min": -0.3318951725959778, + "max": 0.31116846203804016, + "mean": -1.0326401024940424e-05, + "std": 0.03223801404237747, + "abs_mean": 0.025547126308083534, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 33.01129913330078, + "elements": 1048576, + "histogram": { + "counts": [ + 3, + 7, + 9, + 25, + 46, + 66, + 86, + 90, + 118, + 132, + 116, + 82, + 72, + 62, + 38, + 25, + 11, + 3, + 6, + 3 + ], + "bin_edges": [ + -0.09133105725049973, + -0.08163873851299286, + -0.07194641977548599, + -0.06225409731268883, + -0.05256177857518196, + -0.042869459837675095, + -0.03317713737487793, + -0.023484818637371063, + -0.013792499899864197, + -0.00410018116235733, + 0.005592137575149536, + 0.015284456312656403, + 0.024976782500743866, + 0.034669093787670135, + 0.0443614199757576, + 0.05405373126268387, + 0.06374605745077133, + 0.0734383836388588, + 0.08313069492578506, + 0.09282302111387253, + 0.1025153324007988 + ] + } + }, + "transformer.layers.15.2.to_k.bias": { + "min": -7.791203022003174, + "max": 8.74953842163086, + "mean": 0.09337067604064941, + "std": 1.61784029006958, + "abs_mean": 0.9389018416404724, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 51.8317985534668, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 6, + 4, + 6, + 3, + 3, + 22, + 55, + 205, + 417, + 170, + 54, + 21, + 6, + 4, + 7, + 4, + 5, + 1, + 6 + ], + "bin_edges": [ + -7.791203022003174, + -6.964166164398193, + -6.137128829956055, + -5.310091972351074, + -4.483055114746094, + -3.656017780303955, + -2.8289809226989746, + -2.001943588256836, + -1.1749067306518555, + -0.347869873046875, + 0.47916746139526367, + 1.3062043190002441, + 2.1332411766052246, + 2.960278034210205, + 3.787315845489502, + 4.614352703094482, + 5.441389560699463, + 6.268426418304443, + 7.095463275909424, + 7.922501087188721, + 8.74953842163086 + ] + } + }, + "transformer.layers.15.2.to_v.weight": { + "min": -0.23363685607910156, + "max": 0.24183623492717743, + "mean": 4.133234324399382e-05, + "std": 0.0408620610833168, + "abs_mean": 0.03202153369784355, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 41.84220504760742, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 4, + 2, + 5, + 11, + 20, + 30, + 65, + 87, + 106, + 137, + 143, + 126, + 93, + 67, + 46, + 23, + 18, + 10, + 5 + ], + "bin_edges": [ + -0.14702486991882324, + -0.13389593362808228, + -0.12076699733734131, + -0.10763806104660034, + -0.09450912475585938, + -0.08138018846511841, + -0.06825125217437744, + -0.055122315883636475, + -0.04199337959289551, + -0.02886444330215454, + -0.015735507011413574, + -0.0026065707206726074, + 0.01052236557006836, + 0.023651301860809326, + 0.03678023815155029, + 0.04990917444229126, + 0.06303811073303223, + 0.0761670470237732, + 0.08929598331451416, + 0.10242491960525513, + 0.11555387079715729 + ] + } + }, + "transformer.layers.15.2.to_v.bias": { + "min": -0.07588791847229004, + "max": 0.0656837597489357, + "mean": 0.00047856790479272604, + "std": 0.01940334029495716, + "abs_mean": 0.015272315591573715, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.6207925081253052, + "elements": 1024, + "histogram": { + "counts": [ + 4, + 0, + 2, + 4, + 7, + 23, + 36, + 61, + 117, + 128, + 143, + 143, + 128, + 96, + 56, + 22, + 14, + 11, + 4, + 1 + ], + "bin_edges": [ + -0.07588791847229004, + -0.06880933791399002, + -0.061730749905109406, + -0.05465216934680939, + -0.04757358506321907, + -0.040495000779628754, + -0.033416420221328735, + -0.02633783593773842, + -0.019259251654148102, + -0.012180671095848083, + -0.005102083086967468, + 0.00197649747133255, + 0.009055078029632568, + 0.016133666038513184, + 0.023212246596813202, + 0.030290834605693817, + 0.037369415163993835, + 0.044447995722293854, + 0.05152657628059387, + 0.05860516428947449, + 0.0656837597489357 + ] + } + }, + "transformer.layers.15.2.to_out.0.weight": { + "min": -0.2455652505159378, + "max": 0.2337566763162613, + "mean": -2.8880367608508095e-06, + "std": 0.03943672403693199, + "abs_mean": 0.031019341200590134, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 40.38263702392578, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 0, + 2, + 5, + 9, + 23, + 52, + 99, + 158, + 183, + 167, + 134, + 87, + 44, + 22, + 7, + 4, + 2, + 0, + 1 + ], + "bin_edges": [ + -0.17114542424678802, + -0.15392161905765533, + -0.13669782876968384, + -0.11947402358055115, + -0.10225021839141846, + -0.08502641320228577, + -0.06780261546373367, + -0.05057881772518158, + -0.03335501253604889, + -0.0161312073469162, + 0.0010925978422164917, + 0.01831638813018799, + 0.03554019331932068, + 0.05276399850845337, + 0.06998778879642487, + 0.08721159398555756, + 0.10443539917469025, + 0.12165920436382294, + 0.13888300955295563, + 0.15610681474208832, + 0.1733306348323822 + ] + } + }, + "transformer.layers.15.2.to_out.0.bias": { + "min": -0.16261433064937592, + "max": 0.1605682373046875, + "mean": 0.0016338212881237268, + "std": 0.06525633484125137, + "abs_mean": 0.054562319070100784, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 2.0878376960754395, + "elements": 1024, + "histogram": { + "counts": [ + 8, + 11, + 23, + 35, + 34, + 49, + 76, + 81, + 85, + 70, + 68, + 99, + 91, + 82, + 63, + 52, + 39, + 19, + 10, + 5 + ], + "bin_edges": [ + -0.16261433064937592, + -0.14645519852638245, + -0.13029608130455017, + -0.1141369491815567, + -0.09797781705856323, + -0.08181868493556976, + -0.06565956026315689, + -0.04950043559074402, + -0.03334130346775055, + -0.01718217134475708, + -0.0010230392217636108, + 0.015136078000068665, + 0.031295210123062134, + 0.0474543422460556, + 0.06361345946788788, + 0.07977259159088135, + 0.09593172371387482, + 0.11209084093570709, + 0.12824998795986176, + 0.14440910518169403, + 0.1605682373046875 + ] + } + }, + "transformer.layers.15.3.g": { + "min": 0.5568146705627441, + "max": 0.9421050548553467, + "mean": 0.7127699851989746, + "std": 0.03979077190160751, + "abs_mean": 0.7127699851989746, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 22.8441219329834, + "elements": 1024, + "histogram": { + "counts": [ + 3, + 1, + 1, + 12, + 29, + 85, + 155, + 213, + 205, + 153, + 70, + 38, + 15, + 10, + 3, + 2, + 2, + 2, + 0, + 1 + ], + "bin_edges": [ + 0.5568146705627441, + 0.5760791897773743, + 0.5953437089920044, + 0.6146082282066345, + 0.6338727474212646, + 0.6531372666358948, + 0.6724017858505249, + 0.691666305065155, + 0.7109308242797852, + 0.7301953434944153, + 0.7494598627090454, + 0.7687243819236755, + 0.7879889011383057, + 0.8072534203529358, + 0.8265179395675659, + 0.845782458782196, + 0.8650469779968262, + 0.8843114972114563, + 0.9035760164260864, + 0.9228405356407166, + 0.9421050548553467 + ] + } + }, + "transformer.layers.15.4.ff.0.0.weight": { + "min": -0.22831875085830688, + "max": 0.2548784911632538, + "mean": -4.536488631856628e-05, + "std": 0.040581412613391876, + "abs_mean": 0.03228280693292618, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 83.1028060913086, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 1, + 3, + 12, + 23, + 39, + 67, + 94, + 133, + 127, + 128, + 104, + 88, + 78, + 44, + 26, + 17, + 10, + 3, + 2 + ], + "bin_edges": [ + -0.1346137970685959, + -0.12144477665424347, + -0.10827574878931046, + -0.09510672837495804, + -0.08193770051002502, + -0.06876868009567261, + -0.05559965968132019, + -0.042430631816387177, + -0.02926161140203476, + -0.016092590987682343, + -0.0029235631227493286, + 0.010245457291603088, + 0.023414477705955505, + 0.03658349812030792, + 0.04975253343582153, + 0.06292155385017395, + 0.07609057426452637, + 0.08925959467887878, + 0.1024286150932312, + 0.11559765040874481, + 0.12876667082309723 + ] + } + }, + "transformer.layers.15.4.ff.0.0.bias": { + "min": -0.13459284603595734, + "max": 0.02228192612528801, + "mean": -0.04134010896086693, + "std": 0.018355557695031166, + "abs_mean": 0.04144716635346413, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 2.8947877883911133, + "elements": 4096, + "histogram": { + "counts": [ + 4, + 2, + 7, + 22, + 26, + 36, + 74, + 95, + 136, + 125, + 122, + 115, + 98, + 61, + 46, + 15, + 8, + 5, + 2, + 1 + ], + "bin_edges": [ + -0.10183046758174896, + -0.09562484920024872, + -0.08941923081874847, + -0.08321361243724823, + -0.07700798660516739, + -0.07080236822366714, + -0.0645967498421669, + -0.05839112773537636, + -0.052185509353876114, + -0.04597989097237587, + -0.03977426886558533, + -0.03356865048408508, + -0.02736303210258484, + -0.021157413721084595, + -0.014951787889003754, + -0.00874616950750351, + -0.0025405511260032654, + 0.0036650672554969788, + 0.009870685636997223, + 0.016076311469078064, + 0.02228192612528801 + ] + } + }, + "transformer.layers.15.4.ff.2.weight": { + "min": -0.4211236536502838, + "max": 0.3922184407711029, + "mean": -4.3558138713706285e-06, + "std": 0.04779110848903656, + "abs_mean": 0.03745824098587036, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ], + "norm": 97.86603546142578, + "elements": 4194304, + "histogram": { + "counts": [ + 3, + 1, + 3, + 13, + 21, + 47, + 78, + 120, + 132, + 160, + 127, + 128, + 81, + 44, + 28, + 9, + 2, + 2, + 0, + 1 + ], + "bin_edges": [ + -0.1714564561843872, + -0.15337276458740234, + -0.13528907299041748, + -0.11720538139343262, + -0.09912168979644775, + -0.08103799819946289, + -0.06295430660247803, + -0.044870615005493164, + -0.0267869234085083, + -0.008703231811523438, + 0.009380459785461426, + 0.02746415138244629, + 0.04554784297943115, + 0.06363153457641602, + 0.08171522617340088, + 0.09979891777038574, + 0.1178826093673706, + 0.13596630096435547, + 0.15404999256134033, + 0.1721336841583252, + 0.19021736085414886 + ] + } + }, + "transformer.layers.15.4.ff.2.bias": { + "min": -0.6065256595611572, + "max": 0.6503778696060181, + "mean": 0.0015810506884008646, + "std": 0.05679204687476158, + "abs_mean": 0.039021797478199005, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.8171623945236206, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 0, + 0, + 2, + 10, + 177, + 487, + 280, + 38, + 1, + 3, + 0, + 0, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.6065256595611572, + -0.5436804890632629, + -0.48083531856536865, + -0.417990118265152, + -0.3551449477672577, + -0.2922997772693634, + -0.22945457696914673, + -0.16660940647125244, + -0.10376423597335815, + -0.04091906547546387, + 0.02192610502243042, + 0.08477127552032471, + 0.14761650562286377, + 0.21046167612075806, + 0.27330684661865234, + 0.33615201711654663, + 0.3989971876144409, + 0.46184241771698, + 0.5246875286102295, + 0.5875327587127686, + 0.6503778696060181 + ] + } + }, + "transformer.layers.16.0.weight": { + "min": -0.2516687214374542, + "max": 0.3206498920917511, + "mean": -6.057634891476482e-06, + "std": 0.0196156594902277, + "abs_mean": 0.015561186708509922, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ], + "norm": 28.40553855895996, + "elements": 2097152, + "histogram": { + "counts": [ + 1, + 0, + 2, + 3, + 6, + 19, + 34, + 76, + 107, + 136, + 142, + 149, + 134, + 91, + 52, + 28, + 10, + 6, + 3, + 1 + ], + "bin_edges": [ + -0.08025113493204117, + -0.07286576926708221, + -0.06548041105270386, + -0.058095045387744904, + -0.05070968344807625, + -0.04332432150840759, + -0.03593895584344864, + -0.028553593903779984, + -0.021168231964111328, + -0.013782866299152374, + -0.006397508084774017, + 0.0009878575801849365, + 0.00837322324514389, + 0.015758581459522247, + 0.0231439471244812, + 0.030529305338859558, + 0.03791467100381851, + 0.04530002921819687, + 0.05268540233373642, + 0.06007076054811478, + 0.06745612621307373 + ] + } + }, + "transformer.layers.16.1.g": { + "min": 0.35995498299598694, + "max": 0.6810278296470642, + "mean": 0.5706292986869812, + "std": 0.042767371982336044, + "abs_mean": 0.5706292986869812, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 18.31130027770996, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 1, + 2, + 2, + 7, + 5, + 14, + 18, + 21, + 33, + 56, + 113, + 146, + 159, + 175, + 162, + 56, + 17, + 9, + 2 + ], + "bin_edges": [ + 0.35995498299598694, + 0.37600862979888916, + 0.3920622766017914, + 0.4081159234046936, + 0.42416954040527344, + 0.44022321701049805, + 0.4562768340110779, + 0.4723304808139801, + 0.4883841276168823, + 0.5044378042221069, + 0.5204914212226868, + 0.5365450382232666, + 0.5525987148284912, + 0.568652331829071, + 0.5847059488296509, + 0.6007596254348755, + 0.6168133020401001, + 0.6328669190406799, + 0.6489205360412598, + 0.6649742126464844, + 0.6810278296470642 + ] + } + }, + "transformer.layers.16.2.to_q.weight": { + "min": -0.22037938237190247, + "max": 0.1769036501646042, + "mean": -3.467117130639963e-05, + "std": 0.03430242836475372, + "abs_mean": 0.027101609855890274, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 35.12527847290039, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 6, + 16, + 16, + 32, + 40, + 61, + 99, + 140, + 142, + 108, + 116, + 83, + 68, + 30, + 20, + 14, + 5, + 1, + 1 + ], + "bin_edges": [ + -0.10248468071222305, + -0.09197874367237091, + -0.08147279918193817, + -0.07096686214208603, + -0.06046092137694359, + -0.04995498061180115, + -0.039449043571949005, + -0.028943099081516266, + -0.018437162041664124, + -0.007931225001811981, + 0.002574719488620758, + 0.0130806565284729, + 0.023586593568325043, + 0.03409253805875778, + 0.04459848254919052, + 0.05510441213846207, + 0.0656103566288948, + 0.07611630111932755, + 0.08662223070859909, + 0.09712817519903183, + 0.10763411223888397 + ] + } + }, + "transformer.layers.16.2.to_q.bias": { + "min": -0.16339237987995148, + "max": 0.23269455134868622, + "mean": 0.00036311167059466243, + "std": 0.03283863142132759, + "abs_mean": 0.022844718769192696, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.0503872632980347, + "elements": 1024, + "histogram": { + "counts": [ + 3, + 2, + 0, + 3, + 15, + 30, + 108, + 263, + 298, + 179, + 60, + 25, + 6, + 1, + 3, + 0, + 1, + 1, + 0, + 2 + ], + "bin_edges": [ + -0.16339237987995148, + -0.14358803629875183, + -0.12378368526697159, + -0.10397933423519135, + -0.0841749906539917, + -0.06437064707279205, + -0.04456629604101181, + -0.024761945009231567, + -0.004957601428031921, + 0.014846742153167725, + 0.03465108573436737, + 0.05445544421672821, + 0.07425978779792786, + 0.0940641313791275, + 0.11386848986148834, + 0.1336728185415268, + 0.15347717702388763, + 0.17328153550624847, + 0.19308586418628693, + 0.21289022266864777, + 0.23269455134868622 + ] + } + }, + "transformer.layers.16.2.to_k.weight": { + "min": -0.2634328007698059, + "max": 0.23954781889915466, + "mean": -5.2383133152034134e-05, + "std": 0.03390158340334892, + "abs_mean": 0.026794826611876488, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 34.71477508544922, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 2, + 8, + 12, + 14, + 63, + 68, + 81, + 125, + 134, + 144, + 99, + 91, + 56, + 56, + 18, + 18, + 6, + 2, + 1 + ], + "bin_edges": [ + -0.11294181644916534, + -0.10158675163984299, + -0.09023168683052063, + -0.07887662947177887, + -0.06752156466245651, + -0.056166499853134155, + -0.044811442494392395, + -0.03345637768507004, + -0.02210131287574768, + -0.010746248066425323, + 0.0006088167428970337, + 0.011963874101638794, + 0.023318931460380554, + 0.03467400372028351, + 0.04602906107902527, + 0.05738413333892822, + 0.06873919069766998, + 0.08009424805641174, + 0.0914493203163147, + 0.10280437767505646, + 0.11415943503379822 + ] + } + }, + "transformer.layers.16.2.to_k.bias": { + "min": -4.847443580627441, + "max": 5.083292484283447, + "mean": 0.043835077434778214, + "std": 1.227935552597046, + "abs_mean": 0.7887641191482544, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 39.29978561401367, + "elements": 1024, + "histogram": { + "counts": [ + 5, + 5, + 5, + 10, + 12, + 20, + 33, + 48, + 106, + 346, + 217, + 64, + 40, + 30, + 20, + 19, + 11, + 6, + 1, + 2 + ], + "bin_edges": [ + -4.847443580627441, + -4.350906848907471, + -3.854369878768921, + -3.35783314704895, + -2.8612961769104004, + -2.3647594451904297, + -1.868222713470459, + -1.3716857433319092, + -0.8751490116119385, + -0.37861204147338867, + 0.11792469024658203, + 0.6144614219665527, + 1.1109981536865234, + 1.6075348854064941, + 2.104072093963623, + 2.6006088256835938, + 3.0971455574035645, + 3.593682289123535, + 4.090219497680664, + 4.586755752563477, + 5.083292484283447 + ] + } + }, + "transformer.layers.16.2.to_v.weight": { + "min": -0.24653136730194092, + "max": 0.25027644634246826, + "mean": 7.213905337266624e-05, + "std": 0.04399324953556061, + "abs_mean": 0.034520190209150314, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 45.04863739013672, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 0, + 2, + 4, + 13, + 18, + 39, + 70, + 93, + 121, + 123, + 132, + 100, + 99, + 85, + 46, + 23, + 15, + 5, + 11 + ], + "bin_edges": [ + -0.1605098992586136, + -0.14611366391181946, + -0.13171742856502533, + -0.1173211857676506, + -0.10292494297027588, + -0.08852870762348175, + -0.07413247227668762, + -0.0597362294793129, + -0.04533999413251877, + -0.030943751335144043, + -0.016547515988349915, + -0.002151280641555786, + 0.012244954705238342, + 0.02664119005203247, + 0.04103744029998779, + 0.05543367564678192, + 0.06982991099357605, + 0.08422614634037018, + 0.0986223965883255, + 0.11301861703395844, + 0.12741486728191376 + ] + } + }, + "transformer.layers.16.2.to_v.bias": { + "min": -0.06254159659147263, + "max": 0.054444003850221634, + "mean": 0.000650427769869566, + "std": 0.017183585092425346, + "abs_mean": 0.013821554370224476, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.5500001311302185, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 1, + 3, + 3, + 9, + 31, + 50, + 70, + 107, + 118, + 138, + 121, + 113, + 91, + 60, + 50, + 20, + 6, + 5, + 3 + ], + "bin_edges": [ + -0.06254159659147263, + -0.05669231712818146, + -0.05084303766489029, + -0.04499375447630882, + -0.039144475013017654, + -0.033295195549726486, + -0.02744591236114502, + -0.02159663289785385, + -0.015747353434562683, + -0.009898073971271515, + -0.004048794507980347, + 0.0018004849553108215, + 0.007649771869182587, + 0.013499051332473755, + 0.019348330795764923, + 0.02519761025905609, + 0.03104688972234726, + 0.03689616918563843, + 0.042745448648929596, + 0.048594728112220764, + 0.054444003850221634 + ] + } + }, + "transformer.layers.16.2.to_out.0.weight": { + "min": -0.28619009256362915, + "max": 0.2717132866382599, + "mean": -4.993668699171394e-05, + "std": 0.04299163073301315, + "abs_mean": 0.03383805230259895, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 44.022953033447266, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 0, + 0, + 6, + 12, + 16, + 31, + 81, + 120, + 161, + 187, + 155, + 106, + 68, + 33, + 12, + 7, + 2, + 0, + 2 + ], + "bin_edges": [ + -0.18760468065738678, + -0.16928882896900177, + -0.15097299218177795, + -0.13265714049339294, + -0.11434129625558853, + -0.09602545201778412, + -0.07770960032939911, + -0.05939376354217529, + -0.04107791185379028, + -0.022762060165405273, + -0.0044462233781814575, + 0.013869628310203552, + 0.03218547999858856, + 0.05050131678581238, + 0.0688171535730362, + 0.0871330052614212, + 0.10544885694980621, + 0.12376470863819122, + 0.14208056032657623, + 0.16039638221263885, + 0.17871224880218506 + ] + } + }, + "transformer.layers.16.2.to_out.0.bias": { + "min": -0.16040603816509247, + "max": 0.17025713622570038, + "mean": -0.0028844610787928104, + "std": 0.05926158279180527, + "abs_mean": 0.04866192489862442, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.8976906538009644, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 10, + 15, + 33, + 49, + 52, + 99, + 86, + 117, + 95, + 87, + 100, + 81, + 52, + 53, + 30, + 22, + 13, + 3, + 1 + ], + "bin_edges": [ + -0.16040603816509247, + -0.14387288689613342, + -0.12733972072601318, + -0.11080656200647354, + -0.0942734032869339, + -0.07774024456739426, + -0.061207085847854614, + -0.04467392712831497, + -0.02814076840877533, + -0.01160760223865509, + 0.004925549030303955, + 0.021458700299263, + 0.03799186646938324, + 0.05452503263950348, + 0.07105818390846252, + 0.08759133517742157, + 0.10412450134754181, + 0.12065766751766205, + 0.1371908336877823, + 0.15372397005558014, + 0.17025713622570038 + ] + } + }, + "transformer.layers.16.3.g": { + "min": 0.5196964740753174, + "max": 0.9310137629508972, + "mean": 0.7133955955505371, + "std": 0.03807961940765381, + "abs_mean": 0.7133955955505371, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 22.861127853393555, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 1, + 2, + 10, + 11, + 60, + 110, + 221, + 228, + 186, + 105, + 37, + 16, + 8, + 1, + 1, + 0, + 1, + 1 + ], + "bin_edges": [ + 0.5196964740753174, + 0.5402623414993286, + 0.5608282089233398, + 0.5813940763473511, + 0.6019599437713623, + 0.6225258111953735, + 0.6430916786193848, + 0.663657546043396, + 0.6842234134674072, + 0.7047892808914185, + 0.7253551483154297, + 0.7459209561347961, + 0.7664868235588074, + 0.7870526909828186, + 0.8076185584068298, + 0.8281844258308411, + 0.8487502932548523, + 0.8693161606788635, + 0.8898820281028748, + 0.910447895526886, + 0.9310137629508972 + ] + } + }, + "transformer.layers.16.4.ff.0.0.weight": { + "min": -0.23809659481048584, + "max": 0.24939550459384918, + "mean": 0.00046480150194838643, + "std": 0.04046152904629707, + "abs_mean": 0.03219496086239815, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 82.86290740966797, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 5, + 8, + 16, + 28, + 47, + 69, + 99, + 126, + 116, + 110, + 117, + 99, + 62, + 47, + 19, + 15, + 9, + 4, + 3 + ], + "bin_edges": [ + -0.12590646743774414, + -0.11311858147382736, + -0.10033069550991058, + -0.08754280209541321, + -0.07475491613149643, + -0.06196703016757965, + -0.049179136753082275, + -0.0363912507891655, + -0.023603364825248718, + -0.01081547886133194, + 0.001972407102584839, + 0.014760300517082214, + 0.02754819393157959, + 0.04033607244491577, + 0.05312396585941315, + 0.06591184437274933, + 0.0786997377872467, + 0.09148763120174408, + 0.10427550971508026, + 0.11706340312957764, + 0.12985128164291382 + ] + } + }, + "transformer.layers.16.4.ff.0.0.bias": { + "min": -0.14403879642486572, + "max": 0.041449662297964096, + "mean": -0.03967723995447159, + "std": 0.02051496133208275, + "abs_mean": 0.040096282958984375, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 2.85861873626709, + "elements": 4096, + "histogram": { + "counts": [ + 1, + 0, + 0, + 1, + 4, + 8, + 12, + 23, + 41, + 98, + 144, + 181, + 170, + 132, + 79, + 60, + 21, + 12, + 8, + 5 + ], + "bin_edges": [ + -0.14403879642486572, + -0.1353476494550705, + -0.12665650248527527, + -0.11796536296606064, + -0.10927421599626541, + -0.10058306902647018, + -0.09189192950725555, + -0.08320078253746033, + -0.0745096355676651, + -0.06581848859786987, + -0.057127341628074646, + -0.048436202108860016, + -0.03974505513906479, + -0.031053908169269562, + -0.02236276865005493, + -0.013671621680259705, + -0.0049804747104644775, + 0.0037106722593307495, + 0.012401819229125977, + 0.021092966198921204, + 0.029784105718135834 + ] + } + }, + "transformer.layers.16.4.ff.2.weight": { + "min": -0.5321223735809326, + "max": 0.582199215888977, + "mean": 5.9441426856210455e-06, + "std": 0.04886837303638458, + "abs_mean": 0.038299158215522766, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ], + "norm": 100.07161712646484, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 0, + 1, + 0, + 7, + 7, + 24, + 49, + 88, + 118, + 177, + 154, + 144, + 110, + 55, + 36, + 16, + 7, + 3, + 3 + ], + "bin_edges": [ + -0.21546487510204315, + -0.19615697860717773, + -0.1768490970134735, + -0.1575412005186081, + -0.13823330402374268, + -0.11892542243003845, + -0.09961752593517303, + -0.08030964434146881, + -0.061001747846603394, + -0.041693851351737976, + -0.022385969758033752, + -0.003078073263168335, + 0.016229823231697083, + 0.035537704825401306, + 0.05484558641910553, + 0.07415349781513214, + 0.09346137940883636, + 0.11276926100254059, + 0.1320771723985672, + 0.15138505399227142, + 0.17069295048713684 + ] + } + }, + "transformer.layers.16.4.ff.2.bias": { + "min": -0.5183588862419128, + "max": 0.49274152517318726, + "mean": 0.0023598431143909693, + "std": 0.053401440382003784, + "abs_mean": 0.03643597662448883, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.7096800804138184, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 0, + 1, + 1, + 10, + 59, + 279, + 449, + 173, + 18, + 5, + 1, + 0, + 1, + 0, + 1, + 1 + ], + "bin_edges": [ + -0.5183588862419128, + -0.46780386567115784, + -0.41724884510040283, + -0.3666938245296478, + -0.3161388039588928, + -0.2655837833881378, + -0.2150287628173828, + -0.1644737422466278, + -0.1139187216758728, + -0.0633637011051178, + -0.012808680534362793, + 0.0377463698387146, + 0.08830136060714722, + 0.13885635137557983, + 0.18941140174865723, + 0.23996645212173462, + 0.29052144289016724, + 0.34107643365859985, + 0.39163148403167725, + 0.44218653440475464, + 0.49274152517318726 + ] + } + }, + "transformer.layers.17.0.weight": { + "min": -0.27355626225471497, + "max": 0.31514689326286316, + "mean": 1.8169534996559378e-06, + "std": 0.020052826032042503, + "abs_mean": 0.015906326472759247, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ], + "norm": 29.038618087768555, + "elements": 2097152, + "histogram": { + "counts": [ + 3, + 3, + 6, + 10, + 22, + 41, + 60, + 90, + 119, + 102, + 139, + 134, + 96, + 71, + 48, + 34, + 18, + 2, + 1, + 1 + ], + "bin_edges": [ + -0.06576590985059738, + -0.059407200664281845, + -0.05304849147796631, + -0.046689778566360474, + -0.04033106938004494, + -0.0339723601937294, + -0.027613647282123566, + -0.02125493809580803, + -0.014896228909492493, + -0.008537519723176956, + -0.0021788105368614197, + 0.004179902374744415, + 0.01053861528635025, + 0.01689732074737549, + 0.023256033658981323, + 0.02961473912000656, + 0.035973452031612396, + 0.04233216494321823, + 0.04869087040424347, + 0.055049583315849304, + 0.06140829250216484 + ] + } + }, + "transformer.layers.17.1.g": { + "min": 0.36634165048599243, + "max": 0.7102516293525696, + "mean": 0.5930806994438171, + "std": 0.04571138322353363, + "abs_mean": 0.5930806994438171, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 19.03481674194336, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 2, + 1, + 9, + 2, + 5, + 7, + 13, + 24, + 36, + 54, + 100, + 140, + 187, + 198, + 113, + 65, + 33, + 7, + 2 + ], + "bin_edges": [ + 0.36634165048599243, + 0.3835371434688568, + 0.4007326364517212, + 0.41792815923690796, + 0.43512365221977234, + 0.4523191452026367, + 0.4695146381855011, + 0.4867101311683655, + 0.5039056539535522, + 0.5211011171340942, + 0.538296639919281, + 0.5554921627044678, + 0.5726876258850098, + 0.5898831486701965, + 0.6070786118507385, + 0.6242741346359253, + 0.6414695978164673, + 0.658665120601654, + 0.6758606433868408, + 0.6930561065673828, + 0.7102516293525696 + ] + } + }, + "transformer.layers.17.2.to_q.weight": { + "min": -0.21087931096553802, + "max": 0.1994456797838211, + "mean": 3.07354457618203e-05, + "std": 0.034868594259023666, + "abs_mean": 0.027569569647312164, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 35.704959869384766, + "elements": 1048576, + "histogram": { + "counts": [ + 3, + 11, + 16, + 24, + 59, + 104, + 133, + 127, + 149, + 122, + 95, + 72, + 43, + 25, + 5, + 6, + 3, + 2, + 0, + 1 + ], + "bin_edges": [ + -0.09528622031211853, + -0.08350884169340134, + -0.07173146307468414, + -0.05995407700538635, + -0.04817669838666916, + -0.036399319767951965, + -0.024621933698654175, + -0.012844555079936981, + -0.0010671764612197876, + 0.010710202157497406, + 0.0224875807762146, + 0.03426496684551239, + 0.04604235291481018, + 0.05781972408294678, + 0.06959711015224457, + 0.08137448132038116, + 0.09315186738967896, + 0.10492925345897675, + 0.11670662462711334, + 0.12848401069641113, + 0.14026139676570892 + ] + } + }, + "transformer.layers.17.2.to_q.bias": { + "min": -0.1869715005159378, + "max": 0.20369935035705566, + "mean": 0.0009553421987220645, + "std": 0.0314984992146492, + "abs_mean": 0.02109266072511673, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.0079233646392822, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 1, + 2, + 4, + 3, + 6, + 13, + 60, + 197, + 350, + 223, + 100, + 20, + 4, + 1, + 8, + 4, + 1, + 0, + 1 + ], + "bin_edges": [ + -0.1869715005159378, + -0.16743795573711395, + -0.1479044258594513, + -0.12837088108062744, + -0.10883733630180359, + -0.08930379152297974, + -0.06977025419473648, + -0.050236716866493225, + -0.030703172087669373, + -0.01116962730884552, + 0.008363917469978333, + 0.02789744734764099, + 0.047430992126464844, + 0.0669645220041275, + 0.08649806678295135, + 0.10603161156177521, + 0.12556515634059906, + 0.1450987011194229, + 0.16463224589824677, + 0.18416579067707062, + 0.20369935035705566 + ] + } + }, + "transformer.layers.17.2.to_k.weight": { + "min": -0.28932973742485046, + "max": 0.33943668007850647, + "mean": -4.7415778681170195e-05, + "std": 0.034589733928442, + "abs_mean": 0.027366112917661667, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 35.41941833496094, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 1, + 6, + 4, + 13, + 34, + 43, + 71, + 96, + 119, + 119, + 107, + 138, + 90, + 68, + 42, + 22, + 16, + 8, + 2 + ], + "bin_edges": [ + -0.12474610656499863, + -0.11343011260032654, + -0.10211412608623505, + -0.09079813212156296, + -0.07948213815689087, + -0.06816615164279938, + -0.05685015767812729, + -0.0455341711640358, + -0.03421817719936371, + -0.02290218323469162, + -0.011586196720600128, + -0.00027020275592803955, + 0.011045791208744049, + 0.02236177772283554, + 0.03367776423692703, + 0.04499376565217972, + 0.05630975216627121, + 0.0676257386803627, + 0.07894174009561539, + 0.09025772660970688, + 0.10157372057437897 + ] + } + }, + "transformer.layers.17.2.to_k.bias": { + "min": -3.8712191581726074, + "max": 3.3820998668670654, + "mean": 0.014444351196289062, + "std": 0.8576834797859192, + "abs_mean": 0.6098200678825378, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 27.43636131286621, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 2, + 2, + 3, + 6, + 17, + 24, + 27, + 85, + 167, + 247, + 187, + 102, + 55, + 33, + 15, + 11, + 10, + 4, + 1 + ], + "bin_edges": [ + -3.8712191581726074, + -3.5085532665252686, + -3.1458873748779297, + -2.7832212448120117, + -2.420555353164673, + -2.057889461517334, + -1.695223331451416, + -1.3325574398040771, + -0.9698915481567383, + -0.6072256565093994, + -0.24455976486206055, + 0.11810636520385742, + 0.4807724952697754, + 0.8434381484985352, + 1.2061042785644531, + 1.568769931793213, + 1.9314360618591309, + 2.294102191925049, + 2.6567678451538086, + 3.0194339752197266, + 3.3820998668670654 + ] + } + }, + "transformer.layers.17.2.to_v.weight": { + "min": -0.2242382913827896, + "max": 0.24965918064117432, + "mean": -4.0143440855899826e-06, + "std": 0.04223589971661568, + "abs_mean": 0.033358603715896606, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 43.24907684326172, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 1, + 3, + 12, + 23, + 63, + 86, + 120, + 148, + 161, + 141, + 102, + 56, + 43, + 26, + 4, + 7, + 1, + 1, + 1 + ], + "bin_edges": [ + -0.14771312475204468, + -0.13164333999156952, + -0.11557355523109436, + -0.0995037704706192, + -0.08343398571014404, + -0.06736420094966888, + -0.051294416189193726, + -0.03522463142871857, + -0.019154846668243408, + -0.0030850619077682495, + 0.01298472285270691, + 0.029054507613182068, + 0.04512429237365723, + 0.061194077134132385, + 0.07726386189460754, + 0.0933336466550827, + 0.10940343141555786, + 0.12547320127487183, + 0.14154300093650818, + 0.15761280059814453, + 0.1736825704574585 + ] + } + }, + "transformer.layers.17.2.to_v.bias": { + "min": -0.05498581379652023, + "max": 0.046769097447395325, + "mean": -1.842428173404187e-05, + "std": 0.015840334817767143, + "abs_mean": 0.012838434427976608, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.506643533706665, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 1, + 5, + 2, + 20, + 34, + 50, + 76, + 100, + 112, + 135, + 95, + 98, + 115, + 76, + 41, + 20, + 11, + 4, + 4 + ], + "bin_edges": [ + -0.05498581379652023, + -0.049898069351911545, + -0.044810324907302856, + -0.03972257673740387, + -0.03463483229279518, + -0.029547087848186493, + -0.024459341540932655, + -0.019371595233678818, + -0.01428385078907013, + -0.009196106344461441, + -0.004108361899852753, + 0.0009793862700462341, + 0.0060671307146549225, + 0.01115487515926361, + 0.016242623329162598, + 0.021330364048480988, + 0.026418112218379974, + 0.03150586038827896, + 0.03659360110759735, + 0.04168134927749634, + 0.046769097447395325 + ] + } + }, + "transformer.layers.17.2.to_out.0.weight": { + "min": -0.2928566634654999, + "max": 0.29091376066207886, + "mean": -7.36157790015568e-06, + "std": 0.04195090010762215, + "abs_mean": 0.03324268013238907, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 42.957271575927734, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 11, + 14, + 20, + 39, + 64, + 81, + 115, + 134, + 137, + 123, + 87, + 76, + 46, + 25, + 10, + 12, + 2, + 1, + 1 + ], + "bin_edges": [ + -0.12461046129465103, + -0.11084295809268951, + -0.0970754474401474, + -0.08330794423818588, + -0.06954044103622437, + -0.05577293783426285, + -0.042005427181720734, + -0.028237923979759216, + -0.014470420777797699, + -0.0007029175758361816, + 0.013064585626125336, + 0.02683209627866745, + 0.040599606931209564, + 0.054367102682590485, + 0.0681346133351326, + 0.08190210908651352, + 0.09566961973905563, + 0.10943713039159775, + 0.12320462614297867, + 0.13697212934494019, + 0.1507396250963211 + ] + } + }, + "transformer.layers.17.2.to_out.0.bias": { + "min": -0.12467863410711288, + "max": 0.25901108980178833, + "mean": -0.003233879804611206, + "std": 0.05313729867339134, + "abs_mean": 0.04382557421922684, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.7027106285095215, + "elements": 1024, + "histogram": { + "counts": [ + 17, + 40, + 63, + 108, + 104, + 133, + 129, + 110, + 108, + 81, + 65, + 31, + 9, + 1, + 0, + 0, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.12467863410711288, + -0.10549414902925491, + -0.08630965650081635, + -0.06712517142295837, + -0.0479406863451004, + -0.02875620126724243, + -0.009571708738803864, + 0.009612776339054108, + 0.02879726141691208, + 0.04798174649477005, + 0.06716623157262802, + 0.08635071665048599, + 0.10553521662950516, + 0.12471970170736313, + 0.1439041793346405, + 0.16308864951133728, + 0.18227314949035645, + 0.2014576494693756, + 0.2206421196460724, + 0.23982661962509155, + 0.25901108980178833 + ] + } + }, + "transformer.layers.17.3.g": { + "min": 0.4561373293399811, + "max": 0.8428487777709961, + "mean": 0.7054461240768433, + "std": 0.03489769622683525, + "abs_mean": 0.7054461240768433, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 22.601856231689453, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 0, + 2, + 4, + 5, + 10, + 20, + 70, + 158, + 257, + 238, + 148, + 46, + 28, + 7, + 3, + 3 + ], + "bin_edges": [ + 0.4561373293399811, + 0.47547289729118347, + 0.49480846524238586, + 0.5141440629959106, + 0.533479630947113, + 0.5528151988983154, + 0.5721507668495178, + 0.5914863348007202, + 0.6108219027519226, + 0.630157470703125, + 0.6494930386543274, + 0.6688286066055298, + 0.6881641745567322, + 0.7074997425079346, + 0.7268353700637817, + 0.7461708784103394, + 0.7655065059661865, + 0.7848420143127441, + 0.8041776418685913, + 0.8235131502151489, + 0.8428487777709961 + ] + } + }, + "transformer.layers.17.4.ff.0.0.weight": { + "min": -0.5113534331321716, + "max": 0.3484715223312378, + "mean": 0.0003426253970246762, + "std": 0.04020649194717407, + "abs_mean": 0.03195585310459137, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 82.33788299560547, + "elements": 4194304, + "histogram": { + "counts": [ + 3, + 3, + 9, + 20, + 28, + 40, + 60, + 80, + 105, + 102, + 138, + 107, + 90, + 75, + 58, + 36, + 21, + 11, + 9, + 5 + ], + "bin_edges": [ + -0.12453698366880417, + -0.1125292107462883, + -0.10052144527435303, + -0.08851367235183716, + -0.07650589942932129, + -0.06449813395738602, + -0.05249036103487015, + -0.040482595562934875, + -0.028474822640419006, + -0.016467049717903137, + -0.004459284245967865, + 0.007548488676548004, + 0.019556261599063873, + 0.03156403452157974, + 0.04357179254293442, + 0.05557956546545029, + 0.06758733838796616, + 0.07959511131048203, + 0.0916028842329979, + 0.10361064225435257, + 0.11561842262744904 + ] + } + }, + "transformer.layers.17.4.ff.0.0.bias": { + "min": -0.18678922951221466, + "max": 0.03952203318476677, + "mean": -0.03937358409166336, + "std": 0.02131999284029007, + "abs_mean": 0.03990429267287254, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 2.865535259246826, + "elements": 4096, + "histogram": { + "counts": [ + 1, + 1, + 0, + 4, + 6, + 10, + 16, + 48, + 86, + 95, + 135, + 144, + 145, + 119, + 95, + 45, + 27, + 11, + 6, + 6 + ], + "bin_edges": [ + -0.1281859427690506, + -0.12049932032823563, + -0.11281269043684006, + -0.10512606799602509, + -0.09743943810462952, + -0.08975281566381454, + -0.08206619322299957, + -0.074379563331604, + -0.06669294089078903, + -0.05900631844997406, + -0.05131968855857849, + -0.04363306611776352, + -0.03594644367694855, + -0.02825981378555298, + -0.020573191344738007, + -0.012886561453342438, + -0.005199939012527466, + 0.002486690878868103, + 0.010173305869102478, + 0.017859935760498047, + 0.025546569377183914 + ] + } + }, + "transformer.layers.17.4.ff.2.weight": { + "min": -0.5436691045761108, + "max": 0.5556817054748535, + "mean": -7.17876828275621e-05, + "std": 0.05074293538928032, + "abs_mean": 0.039733123034238815, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ], + "norm": 103.91061401367188, + "elements": 4194304, + "histogram": { + "counts": [ + 2, + 2, + 10, + 32, + 45, + 93, + 135, + 137, + 167, + 151, + 103, + 54, + 35, + 20, + 10, + 1, + 2, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.16656459867954254, + -0.14604578912258148, + -0.12552699446678162, + -0.10500818490982056, + -0.0844893753528595, + -0.06397056579589844, + -0.043451763689517975, + -0.022932961583137512, + -0.0024141520261764526, + 0.018104657530784607, + 0.038623467087745667, + 0.05914226174354553, + 0.07966107130050659, + 0.10017986595630646, + 0.12069867551326752, + 0.14121748507022858, + 0.16173629462718964, + 0.1822551041841507, + 0.20277391374111176, + 0.22329272329807281, + 0.24381153285503387 + ] + } + }, + "transformer.layers.17.4.ff.2.bias": { + "min": -0.5110356211662292, + "max": 0.6633175015449524, + "mean": 0.002444919664412737, + "std": 0.04948664829134941, + "abs_mean": 0.03394554927945137, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.5847316980361938, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 0, + 2, + 8, + 128, + 497, + 325, + 36, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.5110356211662292, + -0.4523179531097412, + -0.39360031485557556, + -0.3348826467990875, + -0.2761650085449219, + -0.21744734048843384, + -0.1587296724319458, + -0.10001203417778015, + -0.041294366121292114, + 0.017423272132873535, + 0.07614094018936157, + 0.1348586082458496, + 0.19357627630233765, + 0.2522939443588257, + 0.31101155281066895, + 0.369729220867157, + 0.428446888923645, + 0.48716455698013306, + 0.5458821654319763, + 0.6045998930931091, + 0.6633175015449524 + ] + } + }, + "transformer.layers.18.0.weight": { + "min": -0.3323739171028137, + "max": 0.2654549777507782, + "mean": 3.673961600725306e-06, + "std": 0.019390413537621498, + "abs_mean": 0.01544923335313797, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ], + "norm": 28.079303741455078, + "elements": 2097152, + "histogram": { + "counts": [ + 1, + 4, + 6, + 14, + 21, + 27, + 60, + 82, + 114, + 130, + 140, + 111, + 109, + 77, + 53, + 28, + 11, + 8, + 3, + 1 + ], + "bin_edges": [ + -0.06729830801486969, + -0.06076580286026001, + -0.05423329770565033, + -0.04770079255104065, + -0.04116828739643097, + -0.03463578224182129, + -0.02810327708721161, + -0.02157077193260193, + -0.015038266777992249, + -0.008505761623382568, + -0.001973256468772888, + 0.004559248685836792, + 0.011091753840446472, + 0.017624258995056152, + 0.024156764149665833, + 0.030689269304275513, + 0.03722177445888519, + 0.04375427961349487, + 0.05028678476810455, + 0.05681928992271423, + 0.06335178762674332 + ] + } + }, + "transformer.layers.18.1.g": { + "min": 0.32227811217308044, + "max": 0.7648001313209534, + "mean": 0.6509190201759338, + "std": 0.04508262872695923, + "abs_mean": 0.6509190201759338, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 20.87925910949707, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 1, + 0, + 1, + 1, + 3, + 3, + 6, + 10, + 17, + 27, + 70, + 126, + 202, + 219, + 215, + 82, + 14, + 2 + ], + "bin_edges": [ + 0.32227811217308044, + 0.3444042205810547, + 0.36653029918670654, + 0.3886564075946808, + 0.41078251600265503, + 0.4329086244106293, + 0.4550347328186035, + 0.47716081142425537, + 0.4992869198322296, + 0.5214130282402039, + 0.5435391068458557, + 0.5656652450561523, + 0.5877913236618042, + 0.609917402267456, + 0.6320434808731079, + 0.6541696190834045, + 0.6762957572937012, + 0.698421835899353, + 0.7205479145050049, + 0.7426739931106567, + 0.7648001313209534 + ] + } + }, + "transformer.layers.18.2.to_q.weight": { + "min": -0.24930793046951294, + "max": 0.21936655044555664, + "mean": -2.44708098762203e-06, + "std": 0.036502547562122345, + "abs_mean": 0.028754178434610367, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 37.37805938720703, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 4, + 9, + 23, + 31, + 56, + 68, + 127, + 116, + 139, + 124, + 103, + 80, + 46, + 36, + 24, + 5, + 5, + 1, + 1 + ], + "bin_edges": [ + -0.11012959480285645, + -0.09836138784885406, + -0.08659318089485168, + -0.0748249739408493, + -0.06305676698684692, + -0.05128856375813484, + -0.03952036052942276, + -0.02775215357542038, + -0.015983946621418, + -0.004215739667415619, + 0.0075524672865867615, + 0.019320666790008545, + 0.031088873744010925, + 0.042857080698013306, + 0.054625287652015686, + 0.06639349460601807, + 0.07816170156002045, + 0.08992990851402283, + 0.10169811546802521, + 0.11346632242202759, + 0.12523452937602997 + ] + } + }, + "transformer.layers.18.2.to_q.bias": { + "min": -0.32666686177253723, + "max": 0.2868551015853882, + "mean": -0.0006774846115149558, + "std": 0.03851696848869324, + "abs_mean": 0.023683838546276093, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.232131838798523, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 2, + 2, + 0, + 4, + 7, + 7, + 18, + 160, + 475, + 262, + 41, + 7, + 6, + 5, + 2, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.32666686177253723, + -0.2959907650947571, + -0.26531466841697693, + -0.23463857173919678, + -0.20396247506141663, + -0.17328637838363647, + -0.14261028170585632, + -0.11193418502807617, + -0.08125808835029602, + -0.05058199167251587, + -0.019905894994735718, + 0.010770201683044434, + 0.041446298360824585, + 0.07212239503860474, + 0.10279849171638489, + 0.13347458839416504, + 0.1641506850719452, + 0.19482681155204773, + 0.2255028784275055, + 0.25617894530296326, + 0.2868551015853882 + ] + } + }, + "transformer.layers.18.2.to_k.weight": { + "min": -0.3097042739391327, + "max": 0.3694048821926117, + "mean": 6.485832273028791e-05, + "std": 0.03624315932393074, + "abs_mean": 0.02854427509009838, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 37.11252975463867, + "elements": 1048576, + "histogram": { + "counts": [ + 6, + 11, + 17, + 28, + 44, + 63, + 78, + 106, + 128, + 142, + 131, + 94, + 46, + 41, + 23, + 20, + 10, + 5, + 3, + 4 + ], + "bin_edges": [ + -0.10426375269889832, + -0.09263941645622253, + -0.08101507276296616, + -0.06939072906970978, + -0.057766392827034, + -0.04614205285906792, + -0.03451771289110184, + -0.02289336919784546, + -0.011269032955169678, + 0.0003553032875061035, + 0.011979646980762482, + 0.02360399067401886, + 0.03522832691669464, + 0.04685266315937042, + 0.0584770143032074, + 0.07010135054588318, + 0.08172568678855896, + 0.09335002303123474, + 0.10497435927391052, + 0.1165987104177475, + 0.12822304666042328 + ] + } + }, + "transformer.layers.18.2.to_k.bias": { + "min": -4.71013069152832, + "max": 5.798623085021973, + "mean": 0.03792855516076088, + "std": 1.41161048412323, + "abs_mean": 0.9221487641334534, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 45.16578674316406, + "elements": 1024, + "histogram": { + "counts": [ + 8, + 12, + 12, + 18, + 18, + 20, + 46, + 103, + 266, + 244, + 109, + 60, + 19, + 22, + 13, + 5, + 12, + 7, + 2, + 4 + ], + "bin_edges": [ + -4.71013069152832, + -4.184692859649658, + -3.659255266189575, + -3.133817672729492, + -2.60837984085083, + -2.082942008972168, + -1.557504415512085, + -1.032066822052002, + -0.5066289901733398, + 0.018808841705322266, + 0.5442466735839844, + 1.0696840286254883, + 1.5951218605041504, + 2.1205596923828125, + 2.6459970474243164, + 3.1714348793029785, + 3.6968727111816406, + 4.2223100662231445, + 4.747748374938965, + 5.273185729980469, + 5.798623085021973 + ] + } + }, + "transformer.layers.18.2.to_v.weight": { + "min": -0.22137394547462463, + "max": 0.20554855465888977, + "mean": -7.500727951992303e-05, + "std": 0.042491503059864044, + "abs_mean": 0.033712420612573624, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 43.510765075683594, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 0, + 4, + 10, + 11, + 26, + 40, + 58, + 95, + 146, + 112, + 117, + 122, + 102, + 72, + 51, + 20, + 6, + 5, + 2 + ], + "bin_edges": [ + -0.1555749922990799, + -0.14149212837219238, + -0.12740924954414368, + -0.11332638561725616, + -0.09924351423978806, + -0.08516064286231995, + -0.07107777893543243, + -0.056994907557964325, + -0.042912036180496216, + -0.028829172253608704, + -0.014746293425559998, + -0.0006634294986724854, + 0.013419434428215027, + 0.027502313256263733, + 0.041585177183151245, + 0.05566805601119995, + 0.06975091993808746, + 0.08383378386497498, + 0.09791664779186249, + 0.1119995266199112, + 0.1260823905467987 + ] + } + }, + "transformer.layers.18.2.to_v.bias": { + "min": -0.07746972888708115, + "max": 0.05126894265413284, + "mean": -0.0009250898147001863, + "std": 0.016401393339037895, + "abs_mean": 0.013242571614682674, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.5254228711128235, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 2, + 1, + 4, + 15, + 32, + 72, + 100, + 146, + 136, + 146, + 133, + 107, + 50, + 39, + 11, + 2, + 3 + ], + "bin_edges": [ + -0.07746972888708115, + -0.07103279232978821, + -0.06459586322307587, + -0.05815892666578293, + -0.05172199383378029, + -0.04528506100177765, + -0.03884812444448471, + -0.03241119161248207, + -0.02597425878047943, + -0.01953732594847679, + -0.013100393116474152, + -0.006663456559181213, + -0.00022652000188827515, + 0.006210409104824066, + 0.012647345662117004, + 0.019084274768829346, + 0.025521211326122284, + 0.03195814788341522, + 0.038395076990127563, + 0.0448320135474205, + 0.05126894265413284 + ] + } + }, + "transformer.layers.18.2.to_out.0.weight": { + "min": -0.33084556460380554, + "max": 0.32904890179634094, + "mean": -4.916631951346062e-06, + "std": 0.042798250913619995, + "abs_mean": 0.03404157981276512, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 43.824947357177734, + "elements": 1048576, + "histogram": { + "counts": [ + 3, + 4, + 11, + 11, + 40, + 47, + 57, + 103, + 107, + 142, + 101, + 112, + 98, + 80, + 43, + 19, + 12, + 7, + 1, + 2 + ], + "bin_edges": [ + -0.1374271959066391, + -0.12343340367078781, + -0.10943961143493652, + -0.09544582664966583, + -0.08145203441381454, + -0.06745824217796326, + -0.053464457392692566, + -0.03947066515684128, + -0.02547687292098999, + -0.0114830881357193, + 0.0025107115507125854, + 0.016504496335983276, + 0.030498281121253967, + 0.04449208080768585, + 0.05848586559295654, + 0.07247966527938843, + 0.08647345006465912, + 0.10046723484992981, + 0.1144610196352005, + 0.12845481932163239, + 0.14244860410690308 + ] + } + }, + "transformer.layers.18.2.to_out.0.bias": { + "min": -0.2845572233200073, + "max": 0.11143017560243607, + "mean": -0.0012043914757668972, + "std": 0.04699280112981796, + "abs_mean": 0.03843585401773453, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.5035291910171509, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 4, + 25, + 53, + 86, + 139, + 138, + 157, + 137, + 117, + 84, + 45, + 14 + ], + "bin_edges": [ + -0.2845572233200073, + -0.26475784182548523, + -0.24495849013328552, + -0.22515910863876343, + -0.20535974204540253, + -0.18556037545204163, + -0.16576099395751953, + -0.14596162736415863, + -0.12616226077079773, + -0.10636289417743683, + -0.08656352758407593, + -0.06676414608955383, + -0.04696477949619293, + -0.02716541290283203, + -0.0073660314083099365, + 0.01243332028388977, + 0.032232701778411865, + 0.05203208327293396, + 0.07183143496513367, + 0.09163081645965576, + 0.11143017560243607 + ] + } + }, + "transformer.layers.18.3.g": { + "min": 0.48666608333587646, + "max": 0.885034441947937, + "mean": 0.7373895049095154, + "std": 0.03794779255986214, + "abs_mean": 0.7373895049095154, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 23.62765884399414, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 1, + 2, + 2, + 2, + 1, + 2, + 3, + 6, + 28, + 83, + 195, + 270, + 216, + 119, + 41, + 16, + 4, + 3, + 4 + ], + "bin_edges": [ + 0.48666608333587646, + 0.5065845251083374, + 0.5265029072761536, + 0.5464213490486145, + 0.5663397312164307, + 0.5862581729888916, + 0.6061766147613525, + 0.6260949969291687, + 0.6460134387016296, + 0.6659318208694458, + 0.6858502626419067, + 0.7057687044143677, + 0.7256870865821838, + 0.74560546875, + 0.7655239105224609, + 0.7854423522949219, + 0.8053607940673828, + 0.825279176235199, + 0.8451975584030151, + 0.8651160001754761, + 0.885034441947937 + ] + } + }, + "transformer.layers.18.4.ff.0.0.weight": { + "min": -0.3611343502998352, + "max": 0.27392831444740295, + "mean": 5.1206770876888186e-05, + "std": 0.04065323248505592, + "abs_mean": 0.032221995294094086, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 83.24979400634766, + "elements": 4194304, + "histogram": { + "counts": [ + 2, + 2, + 11, + 22, + 24, + 53, + 90, + 105, + 121, + 146, + 116, + 111, + 73, + 58, + 36, + 18, + 10, + 1, + 0, + 1 + ], + "bin_edges": [ + -0.13471700251102448, + -0.1202903464436531, + -0.10586368292570114, + -0.09143702685832977, + -0.07701036334037781, + -0.06258370727300644, + -0.04815705120563507, + -0.033730387687683105, + -0.019303731620311737, + -0.004877075552940369, + 0.009549587965011597, + 0.023976251482963562, + 0.038402900099754333, + 0.0528295636177063, + 0.06725622713565826, + 0.08168287575244904, + 0.096109539270401, + 0.11053620278835297, + 0.12496285140514374, + 0.1393895298242569, + 0.15381617844104767 + ] + } + }, + "transformer.layers.18.4.ff.0.0.bias": { + "min": -0.2472306787967682, + "max": 0.046531591564416885, + "mean": -0.03925502672791481, + "std": 0.023223698139190674, + "abs_mean": 0.039705656468868256, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 2.9189653396606445, + "elements": 4096, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 3, + 1, + 0, + 9, + 22, + 49, + 155, + 251, + 220, + 190, + 71, + 20, + 8 + ], + "bin_edges": [ + -0.2472306787967682, + -0.2334253042936325, + -0.21961992979049683, + -0.20581455528736115, + -0.19200918078422546, + -0.17820380628108978, + -0.1643984317779541, + -0.15059305727481842, + -0.13678768277168274, + -0.12298230826854706, + -0.10917693376541138, + -0.0953715592622757, + -0.08156618475914001, + -0.06776081025600433, + -0.05395543575286865, + -0.04015006124973297, + -0.02634468674659729, + -0.012539312243461609, + 0.0012660622596740723, + 0.01507142186164856, + 0.02887682616710663 + ] + } + }, + "transformer.layers.18.4.ff.2.weight": { + "min": -0.62546706199646, + "max": 0.596234142780304, + "mean": -6.186795508256182e-05, + "std": 0.0531260222196579, + "abs_mean": 0.04139447957277298, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ], + "norm": 108.79020690917969, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 1, + 3, + 10, + 29, + 69, + 135, + 217, + 218, + 178, + 77, + 42, + 12, + 5, + 1, + 0, + 1, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.22725528478622437, + -0.1996065378189087, + -0.17195777595043182, + -0.14430902898311615, + -0.11666027456521988, + -0.08901152014732361, + -0.061362773180007935, + -0.03371401131153107, + -0.006065264344215393, + 0.02158348262310028, + 0.04923224449157715, + 0.07688099145889282, + 0.1045297384262085, + 0.13217848539352417, + 0.15982726216316223, + 0.1874760091304779, + 0.21512475609779358, + 0.24277350306510925, + 0.2704222500324249, + 0.298071026802063, + 0.32571980357170105 + ] + } + }, + "transformer.layers.18.4.ff.2.bias": { + "min": -0.7086492776870728, + "max": 0.2654070556163788, + "mean": 0.0009191531571559608, + "std": 0.05119417607784271, + "abs_mean": 0.035680606961250305, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.6376776695251465, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 5, + 40, + 215, + 441, + 242, + 42, + 10, + 1, + 2 + ], + "bin_edges": [ + -0.7086492776870728, + -0.6599464416503906, + -0.6112436652183533, + -0.5625408291816711, + -0.513837993144989, + -0.4651351869106293, + -0.41643238067626953, + -0.3677295446395874, + -0.31902673840522766, + -0.2703239321708679, + -0.2216210961341858, + -0.17291826009750366, + -0.12421548366546631, + -0.07551264762878418, + -0.02680981159210205, + 0.021892964839935303, + 0.07059580087661743, + 0.11929863691329956, + 0.16800141334533691, + 0.21670424938201904, + 0.2654070556163788 + ] + } + }, + "transformer.layers.19.0.weight": { + "min": -0.34331265091896057, + "max": 0.30340248346328735, + "mean": 2.3374013835564256e-07, + "std": 0.019139692187309265, + "abs_mean": 0.015232603996992111, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ], + "norm": 27.716217041015625, + "elements": 2097152, + "histogram": { + "counts": [ + 7, + 10, + 21, + 25, + 45, + 68, + 64, + 107, + 120, + 115, + 105, + 99, + 69, + 56, + 38, + 30, + 14, + 4, + 1, + 2 + ], + "bin_edges": [ + -0.05381210148334503, + -0.04800749570131302, + -0.04220288619399071, + -0.036398276686668396, + -0.030593670904636383, + -0.02478906325995922, + -0.01898445561528206, + -0.013179846107959747, + -0.007375240325927734, + -0.0015706345438957214, + 0.00423397496342659, + 0.010038584470748901, + 0.015843190252780914, + 0.021647796034812927, + 0.027452409267425537, + 0.03325701504945755, + 0.03906162083148956, + 0.044866226613521576, + 0.05067083239555359, + 0.0564754456281662, + 0.06228005141019821 + ] + } + }, + "transformer.layers.19.1.g": { + "min": 0.3500247001647949, + "max": 0.7813002467155457, + "mean": 0.6387312412261963, + "std": 0.048984214663505554, + "abs_mean": 0.6387312412261963, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 20.499359130859375, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 0, + 0, + 0, + 1, + 6, + 12, + 14, + 10, + 24, + 53, + 83, + 164, + 193, + 193, + 160, + 67, + 13, + 4, + 1 + ], + "bin_edges": [ + 0.3500247001647949, + 0.37158846855163574, + 0.39315226674079895, + 0.41471603512763977, + 0.4362798035144806, + 0.4578436017036438, + 0.4794073700904846, + 0.5009711384773254, + 0.5225349068641663, + 0.5440987348556519, + 0.5656625032424927, + 0.5872262716293335, + 0.6087900400161743, + 0.6303538084030151, + 0.651917576789856, + 0.6734813451766968, + 0.6950451135635376, + 0.7166088819503784, + 0.738172709941864, + 0.7597364783287048, + 0.7813002467155457 + ] + } + }, + "transformer.layers.19.2.to_q.weight": { + "min": -0.20559599995613098, + "max": 0.20657846331596375, + "mean": -5.995870742481202e-05, + "std": 0.03769858554005623, + "abs_mean": 0.02982865273952484, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 38.60289001464844, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 3, + 4, + 4, + 18, + 40, + 60, + 94, + 114, + 145, + 151, + 133, + 99, + 67, + 33, + 18, + 10, + 4, + 1, + 1 + ], + "bin_edges": [ + -0.13801053166389465, + -0.12424114346504211, + -0.11047175526618958, + -0.09670236706733704, + -0.0829329788684845, + -0.06916359066963196, + -0.05539420247077942, + -0.04162481427192688, + -0.02785542607307434, + -0.014086037874221802, + -0.0003166496753692627, + 0.013452738523483276, + 0.027222126722335815, + 0.040991514921188354, + 0.054760903120040894, + 0.06853029131889343, + 0.08229967951774597, + 0.09606906771659851, + 0.10983845591545105, + 0.12360784411430359, + 0.13737723231315613 + ] + } + }, + "transformer.layers.19.2.to_q.bias": { + "min": -0.25827330350875854, + "max": 0.26797717809677124, + "mean": -0.00040583324152976274, + "std": 0.04458905756473541, + "abs_mean": 0.030639849603176117, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.426212191581726, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 3, + 1, + 1, + 2, + 9, + 17, + 53, + 173, + 308, + 262, + 110, + 34, + 5, + 10, + 3, + 4, + 3, + 0, + 1 + ], + "bin_edges": [ + -0.25827330350875854, + -0.23196077346801758, + -0.2056482583284378, + -0.17933572828769684, + -0.15302321314811707, + -0.1267106831073761, + -0.10039815306663513, + -0.07408563792705536, + -0.04777310788631439, + -0.021460577845573425, + 0.004851937294006348, + 0.031164467334747314, + 0.05747699737548828, + 0.08378952741622925, + 0.11010202765464783, + 0.1364145576953888, + 0.16272708773612976, + 0.18903961777687073, + 0.2153521478176117, + 0.24166464805603027, + 0.26797717809677124 + ] + } + }, + "transformer.layers.19.2.to_k.weight": { + "min": -0.35375165939331055, + "max": 0.32213273644447327, + "mean": -7.335219379456248e-06, + "std": 0.03720685839653015, + "abs_mean": 0.029421523213386536, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 38.09929656982422, + "elements": 1048576, + "histogram": { + "counts": [ + 3, + 2, + 5, + 11, + 25, + 36, + 54, + 93, + 104, + 120, + 141, + 134, + 95, + 66, + 43, + 30, + 16, + 13, + 5, + 4 + ], + "bin_edges": [ + -0.1217818558216095, + -0.11000676453113556, + -0.09823167324066162, + -0.08645657449960709, + -0.07468148320913315, + -0.06290639191865921, + -0.051131293177604675, + -0.03935620188713074, + -0.0275811105966568, + -0.01580601930618286, + -0.004030928015708923, + 0.007744163274765015, + 0.019519269466400146, + 0.031294360756874084, + 0.04306945204734802, + 0.05484454333782196, + 0.0666196346282959, + 0.07839472591876984, + 0.09016981720924377, + 0.10194490849971771, + 0.11371999979019165 + ] + } + }, + "transformer.layers.19.2.to_k.bias": { + "min": -5.253459930419922, + "max": 4.198183536529541, + "mean": -0.0263908039778471, + "std": 1.0056793689727783, + "abs_mean": 0.6567726135253906, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 32.17710876464844, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 1, + 3, + 2, + 9, + 10, + 13, + 32, + 36, + 79, + 265, + 316, + 129, + 31, + 27, + 19, + 14, + 10, + 2, + 1 + ], + "bin_edges": [ + -5.253459930419922, + -4.780877590179443, + -4.308295726776123, + -3.8357133865356445, + -3.363131284713745, + -2.8905491828918457, + -2.417966842651367, + -1.9453847408294678, + -1.4728026390075684, + -1.0002202987670898, + -0.5276384353637695, + -0.055056095123291016, + 0.4175262451171875, + 0.8901081085205078, + 1.3626904487609863, + 1.8352723121643066, + 2.307854652404785, + 2.7804365158081055, + 3.253019332885742, + 3.7256011962890625, + 4.198183536529541 + ] + } + }, + "transformer.layers.19.2.to_v.weight": { + "min": -0.23853513598442078, + "max": 0.24350698292255402, + "mean": -2.5575776817277074e-05, + "std": 0.04321583732962608, + "abs_mean": 0.03416847437620163, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 44.25249481201172, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 4, + 9, + 18, + 30, + 48, + 82, + 91, + 120, + 131, + 129, + 115, + 84, + 62, + 41, + 13, + 13, + 5, + 1, + 3 + ], + "bin_edges": [ + -0.1413203775882721, + -0.1268763244152069, + -0.11243227869272232, + -0.09798823297023773, + -0.08354417979717255, + -0.06910013407468796, + -0.05465608835220337, + -0.040212035179138184, + -0.025767989456653595, + -0.011323943734169006, + 0.003120109438896179, + 0.017564162611961365, + 0.032008200883865356, + 0.04645225405693054, + 0.06089630722999573, + 0.07534034550189972, + 0.0897843986749649, + 0.10422845184803009, + 0.11867249011993408, + 0.13311654329299927, + 0.14756059646606445 + ] + } + }, + "transformer.layers.19.2.to_v.bias": { + "min": -0.06232254579663277, + "max": 0.05653427913784981, + "mean": 0.0003516775614116341, + "std": 0.014141896739602089, + "abs_mean": 0.011390534229576588, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.4524596631526947, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 3, + 1, + 6, + 13, + 35, + 90, + 109, + 148, + 154, + 146, + 147, + 87, + 38, + 15, + 4, + 1, + 1, + 1 + ], + "bin_edges": [ + -0.06232254579663277, + -0.05637970566749573, + -0.05043686181306839, + -0.04449402168393135, + -0.03855118155479431, + -0.03260834142565727, + -0.026665497571229935, + -0.020722657442092896, + -0.014779817312955856, + -0.008836977183818817, + -0.002894137054681778, + 0.0030487067997455597, + 0.008991550654172897, + 0.014934387058019638, + 0.020877230912446976, + 0.026820067316293716, + 0.032762911170721054, + 0.03870575502514839, + 0.04464859142899513, + 0.05059143528342247, + 0.05653427913784981 + ] + } + }, + "transformer.layers.19.2.to_out.0.weight": { + "min": -0.437425822019577, + "max": 0.3736904561519623, + "mean": 1.4616346561524551e-05, + "std": 0.044127896428108215, + "abs_mean": 0.03491860628128052, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 45.18648910522461, + "elements": 1048576, + "histogram": { + "counts": [ + 3, + 4, + 3, + 21, + 23, + 48, + 62, + 87, + 109, + 114, + 128, + 127, + 111, + 53, + 50, + 24, + 14, + 13, + 3, + 3 + ], + "bin_edges": [ + -0.1402941793203354, + -0.12645918130874634, + -0.1126241683959961, + -0.09878916293382645, + -0.0849541574716568, + -0.07111915200948715, + -0.057284146547317505, + -0.04344914108514786, + -0.02961413562297821, + -0.015779130160808563, + -0.001944124698638916, + 0.011890873312950134, + 0.02572588622570038, + 0.03956089913845062, + 0.05339589715003967, + 0.06723089516162872, + 0.08106590807437897, + 0.09490092098712921, + 0.10873591899871826, + 0.12257091701030731, + 0.13640592992305756 + ] + } + }, + "transformer.layers.19.2.to_out.0.bias": { + "min": -0.09596914798021317, + "max": 0.17601557075977325, + "mean": -0.0006586366798728704, + "std": 0.03512872755527496, + "abs_mean": 0.028804786503314972, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.1237679719924927, + "elements": 1024, + "histogram": { + "counts": [ + 4, + 15, + 38, + 88, + 99, + 107, + 136, + 149, + 118, + 125, + 59, + 46, + 12, + 3, + 0, + 0, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.09596914798021317, + -0.08236990869045258, + -0.06877067685127258, + -0.05517143756151199, + -0.0415722019970417, + -0.02797296643257141, + -0.014373727142810822, + -0.0007744953036308289, + 0.01282474398612976, + 0.02642398327589035, + 0.04002321511507034, + 0.053622446954250336, + 0.06722169369459152, + 0.08082092553377151, + 0.09442015737295151, + 0.1080194041132927, + 0.12161863595247269, + 0.13521787524223328, + 0.14881712198257446, + 0.16241633892059326, + 0.17601557075977325 + ] + } + }, + "transformer.layers.19.3.g": { + "min": 0.42178472876548767, + "max": 1.06712007522583, + "mean": 0.7484290599822998, + "std": 0.04182668402791023, + "abs_mean": 0.7484290599822998, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 23.987064361572266, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 1, + 4, + 1, + 3, + 3, + 4, + 22, + 73, + 291, + 408, + 157, + 24, + 6, + 0, + 1, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + 0.42178472876548767, + 0.45405149459838867, + 0.4863182604312897, + 0.5185850262641907, + 0.5508518218994141, + 0.5831185579299927, + 0.6153852939605713, + 0.6476520895957947, + 0.6799188852310181, + 0.7121856212615967, + 0.7444523572921753, + 0.7767191529273987, + 0.8089859485626221, + 0.8412526845932007, + 0.8735194206237793, + 0.9057862162590027, + 0.9380530118942261, + 0.9703197479248047, + 1.0025864839553833, + 1.0348533391952515, + 1.06712007522583 + ] + } + }, + "transformer.layers.19.4.ff.0.0.weight": { + "min": -0.26583534479141235, + "max": 0.29665902256965637, + "mean": -7.891673885751516e-05, + "std": 0.04081389307975769, + "abs_mean": 0.032326411455869675, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 83.57918548583984, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 4, + 8, + 13, + 21, + 53, + 78, + 130, + 144, + 145, + 138, + 111, + 72, + 41, + 20, + 15, + 4, + 1, + 0, + 1 + ], + "bin_edges": [ + -0.1381877362728119, + -0.12331786006689072, + -0.10844798386096954, + -0.09357810765504837, + -0.0787082314491272, + -0.06383835524320602, + -0.04896847903728485, + -0.03409860283136368, + -0.019228726625442505, + -0.004358857870101929, + 0.010511025786399841, + 0.02538090944290161, + 0.04025077819824219, + 0.055120646953582764, + 0.06999053061008453, + 0.0848604142665863, + 0.09973028302192688, + 0.11460015177726746, + 0.12947002053260803, + 0.144339919090271, + 0.15920978784561157 + ] + } + }, + "transformer.layers.19.4.ff.0.0.bias": { + "min": -0.18455219268798828, + "max": 0.043140046298503876, + "mean": -0.03679502755403519, + "std": 0.0255513247102499, + "abs_mean": 0.037825148552656174, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 2.866874933242798, + "elements": 4096, + "histogram": { + "counts": [ + 3, + 1, + 4, + 4, + 4, + 6, + 25, + 28, + 45, + 79, + 129, + 176, + 166, + 140, + 108, + 45, + 25, + 7, + 3, + 2 + ], + "bin_edges": [ + -0.1534748673439026, + -0.1436709612607956, + -0.1338670551776886, + -0.12406314164400101, + -0.11425923556089401, + -0.10445532947778702, + -0.09465141594409943, + -0.08484750986099243, + -0.07504360377788544, + -0.06523969769477844, + -0.05543579161167145, + -0.045631878077983856, + -0.03582797199487686, + -0.02602405846118927, + -0.016220152378082275, + -0.006416246294975281, + 0.003387659788131714, + 0.013191565871238708, + 0.022995471954345703, + 0.0327993780374527, + 0.042603280395269394 + ] + } + }, + "transformer.layers.19.4.ff.2.weight": { + "min": -0.45756417512893677, + "max": 0.4861648976802826, + "mean": 4.3982381612295285e-05, + "std": 0.05422103777527809, + "abs_mean": 0.042265694588422775, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ], + "norm": 111.03323364257812, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 2, + 0, + 4, + 10, + 17, + 55, + 56, + 129, + 151, + 178, + 163, + 111, + 72, + 34, + 10, + 5, + 0, + 1, + 1 + ], + "bin_edges": [ + -0.23315100371837616, + -0.21043646335601807, + -0.18772193789482117, + -0.16500739753246307, + -0.14229285717010498, + -0.11957833170890808, + -0.09686379134654999, + -0.07414926588535309, + -0.051434725522994995, + -0.028720185160636902, + -0.0060056596994400024, + 0.01670888066291809, + 0.039423421025276184, + 0.062137946486473083, + 0.08485247194766998, + 0.10756702721118927, + 0.13028155267238617, + 0.15299607813358307, + 0.17571063339710236, + 0.19842515885829926, + 0.22113966941833496 + ] + } + }, + "transformer.layers.19.4.ff.2.bias": { + "min": -0.2858409285545349, + "max": 0.5508930087089539, + "mean": -0.0008807203266769648, + "std": 0.047792647033929825, + "abs_mean": 0.033110618591308594, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.5288777351379395, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 1, + 1, + 1, + 2, + 17, + 54, + 154, + 306, + 297, + 111, + 47, + 7, + 0, + 0, + 0, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.2858409285545349, + -0.253815621137619, + -0.22179031372070312, + -0.18976500630378723, + -0.15773969888687134, + -0.12571439146995544, + -0.09368908405303955, + -0.06166377663612366, + -0.029638469219207764, + 0.00238683819770813, + 0.03441214561462402, + 0.06643745303153992, + 0.09846276044845581, + 0.1304880678653717, + 0.1625133752822876, + 0.1945386826992035, + 0.22656399011611938, + 0.25858932733535767, + 0.29061460494995117, + 0.3226398825645447, + 0.35466521978378296 + ] + } + }, + "transformer.layers.20.0.weight": { + "min": -0.2925868332386017, + "max": 0.32265621423721313, + "mean": 6.008186119288439e-06, + "std": 0.0199727825820446, + "abs_mean": 0.015888523310422897, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ], + "norm": 28.922727584838867, + "elements": 2097152, + "histogram": { + "counts": [ + 9, + 12, + 45, + 61, + 89, + 149, + 146, + 141, + 145, + 96, + 55, + 29, + 15, + 5, + 1, + 1, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.054424017667770386, + -0.04656292125582695, + -0.038701824843883514, + -0.030840732157230377, + -0.02297963574528694, + -0.015118539333343506, + -0.007257446646690369, + 0.000603649765253067, + 0.008464746177196503, + 0.01632583886384964, + 0.024186939001083374, + 0.03204803168773651, + 0.03990912437438965, + 0.04777022451162338, + 0.05563131719827652, + 0.06349241733551025, + 0.07135351002216339, + 0.07921460270881653, + 0.08707569539546967, + 0.094936802983284, + 0.10279788821935654 + ] + } + }, + "transformer.layers.20.1.g": { + "min": 0.2913132309913635, + "max": 0.7585903406143188, + "mean": 0.6507112979888916, + "std": 0.05193017050623894, + "abs_mean": 0.6507112979888916, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 20.88890266418457, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 0, + 0, + 0, + 1, + 2, + 3, + 4, + 10, + 12, + 15, + 20, + 40, + 77, + 147, + 229, + 251, + 132, + 46, + 9 + ], + "bin_edges": [ + 0.2913132309913635, + 0.31467708945274353, + 0.33804094791412354, + 0.36140477657318115, + 0.38476866483688354, + 0.40813249349594116, + 0.43149635195732117, + 0.45486021041870117, + 0.4782240688800812, + 0.5015879273414612, + 0.5249518156051636, + 0.5483156442642212, + 0.5716794729232788, + 0.5950433015823364, + 0.6184071898460388, + 0.6417710781097412, + 0.6651349067687988, + 0.6884987354278564, + 0.7118626236915588, + 0.7352265119552612, + 0.7585903406143188 + ] + } + }, + "transformer.layers.20.2.to_q.weight": { + "min": -0.24352194368839264, + "max": 0.26151588559150696, + "mean": -5.6967542150232475e-06, + "std": 0.03961416333913803, + "abs_mean": 0.03143656626343727, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 40.564388275146484, + "elements": 1048576, + "histogram": { + "counts": [ + 3, + 2, + 2, + 5, + 10, + 28, + 41, + 73, + 99, + 142, + 174, + 144, + 94, + 80, + 57, + 22, + 15, + 2, + 3, + 4 + ], + "bin_edges": [ + -0.15721261501312256, + -0.14256204664707184, + -0.1279114931821823, + -0.11326092481613159, + -0.09861036390066147, + -0.08395980298519135, + -0.06930923461914062, + -0.0546586737036705, + -0.04000811278820038, + -0.025357544422149658, + -0.010706990957260132, + 0.003943577408790588, + 0.01859414577484131, + 0.033244699239730835, + 0.047895267605781555, + 0.06254582107067108, + 0.0771963894367218, + 0.09184695780277252, + 0.10649752616882324, + 0.12114807963371277, + 0.1357986181974411 + ] + } + }, + "transformer.layers.20.2.to_q.bias": { + "min": -0.26712363958358765, + "max": 0.19983239471912384, + "mean": -0.0008771903812885284, + "std": 0.0517287477850914, + "abs_mean": 0.03703365474939346, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.6547496318817139, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 3, + 6, + 9, + 10, + 24, + 49, + 102, + 196, + 252, + 165, + 92, + 42, + 21, + 12, + 2, + 10, + 4 + ], + "bin_edges": [ + -0.26712363958358765, + -0.2437758445739746, + -0.22042803466320038, + -0.19708023965358734, + -0.1737324297428131, + -0.15038463473320007, + -0.12703683972358704, + -0.1036890298128128, + -0.08034123480319977, + -0.05699343979358673, + -0.0336456298828125, + -0.010297834873199463, + 0.013049960136413574, + 0.03639775514602661, + 0.059745579957962036, + 0.08309337496757507, + 0.10644116997718811, + 0.12978896498680115, + 0.15313675999641418, + 0.1764845848083496, + 0.19983239471912384 + ] + } + }, + "transformer.layers.20.2.to_k.weight": { + "min": -0.2718246877193451, + "max": 0.25335949659347534, + "mean": 5.239124220679514e-06, + "std": 0.03871086984872818, + "abs_mean": 0.030697766691446304, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 39.639469146728516, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 2, + 7, + 13, + 20, + 60, + 71, + 100, + 142, + 121, + 141, + 116, + 84, + 55, + 31, + 19, + 9, + 6, + 0, + 1 + ], + "bin_edges": [ + -0.1246657595038414, + -0.11169023811817169, + -0.09871471673250198, + -0.08573919534683228, + -0.07276367396116257, + -0.05978815257549286, + -0.04681263118982315, + -0.03383710980415344, + -0.020861588418483734, + -0.007886067032814026, + 0.005089454352855682, + 0.018064983189105988, + 0.0310404971241951, + 0.04401601105928421, + 0.056991539895534515, + 0.06996706873178482, + 0.08294258266687393, + 0.09591809660196304, + 0.10889362543821335, + 0.12186915427446365, + 0.13484467566013336 + ] + } + }, + "transformer.layers.20.2.to_k.bias": { + "min": -12.94522476196289, + "max": 15.922240257263184, + "mean": 0.03318937495350838, + "std": 1.9867888689041138, + "abs_mean": 1.0522326231002808, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 63.5550651550293, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 2, + 2, + 4, + 6, + 5, + 17, + 62, + 421, + 384, + 56, + 15, + 9, + 5, + 2, + 6, + 2, + 0, + 0, + 1 + ], + "bin_edges": [ + -12.94522476196289, + -11.501851081848145, + -10.058478355407715, + -8.615104675292969, + -7.171731472015381, + -5.728358268737793, + -4.284984588623047, + -2.841611862182617, + -1.398238182067871, + 0.045135498046875, + 1.4885082244873047, + 2.931881904602051, + 4.375255584716797, + 5.818628311157227, + 7.262001037597656, + 8.705375671386719, + 10.148748397827148, + 11.592121124267578, + 13.03549575805664, + 14.47886848449707, + 15.922240257263184 + ] + } + }, + "transformer.layers.20.2.to_v.weight": { + "min": -0.20649555325508118, + "max": 0.22559243440628052, + "mean": -7.256461685756221e-05, + "std": 0.040558841079473495, + "abs_mean": 0.03210737928748131, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 41.531700134277344, + "elements": 1048576, + "histogram": { + "counts": [ + 3, + 2, + 1, + 8, + 18, + 38, + 64, + 99, + 114, + 132, + 137, + 112, + 109, + 82, + 42, + 22, + 6, + 5, + 4, + 2 + ], + "bin_edges": [ + -0.14026805758476257, + -0.12651239335536957, + -0.11275672912597656, + -0.09900107234716415, + -0.08524540811777115, + -0.07148974388837814, + -0.057734087109565735, + -0.04397842288017273, + -0.030222758650779724, + -0.01646709442138672, + -0.0027114301919937134, + 0.011044234037399292, + 0.024799883365631104, + 0.03855554759502411, + 0.052311211824417114, + 0.06606687605381012, + 0.07982254028320312, + 0.09357820451259613, + 0.10733386874198914, + 0.12108951807022095, + 0.13484519720077515 + ] + } + }, + "transformer.layers.20.2.to_v.bias": { + "min": -0.06932304799556732, + "max": 0.06304260343313217, + "mean": 0.0001579949603183195, + "std": 0.014740646816790104, + "abs_mean": 0.011740190908312798, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.47149741649627686, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 1, + 1, + 0, + 1, + 7, + 25, + 99, + 135, + 159, + 159, + 157, + 137, + 71, + 26, + 10, + 5, + 1, + 2, + 3 + ], + "bin_edges": [ + -0.06932304799556732, + -0.06270476430654526, + -0.05608648434281349, + -0.04946820065379143, + -0.04284992069005966, + -0.0362316370010376, + -0.029613353312015533, + -0.022995073348283768, + -0.016376789659261703, + -0.00975850597023964, + -0.0031402260065078735, + 0.0034780576825141907, + 0.010096341371536255, + 0.01671462506055832, + 0.023332901298999786, + 0.02995118498802185, + 0.036569468677043915, + 0.04318775236606598, + 0.04980603605508804, + 0.05642431974411011, + 0.06304260343313217 + ] + } + }, + "transformer.layers.20.2.to_out.0.weight": { + "min": -0.4653640687465668, + "max": 0.3200652003288269, + "mean": 1.952598540810868e-05, + "std": 0.04059439152479172, + "abs_mean": 0.032202161848545074, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 41.56819152832031, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 1, + 4, + 0, + 11, + 21, + 27, + 66, + 88, + 111, + 134, + 119, + 140, + 93, + 64, + 61, + 29, + 15, + 10, + 5 + ], + "bin_edges": [ + -0.15654049813747406, + -0.1427799016237259, + -0.12901930510997772, + -0.11525871604681015, + -0.10149811953306198, + -0.08773752301931381, + -0.07397693395614624, + -0.06021633744239807, + -0.0464557409286499, + -0.03269514441490173, + -0.018934547901153564, + -0.0051739513874053955, + 0.00858663022518158, + 0.02234722673892975, + 0.03610782325267792, + 0.049868419766426086, + 0.06362901628017426, + 0.07738961279392242, + 0.0911502093076706, + 0.10491080582141876, + 0.11867139488458633 + ] + } + }, + "transformer.layers.20.2.to_out.0.bias": { + "min": -0.06398282200098038, + "max": 0.11537733674049377, + "mean": 0.0011978133115917444, + "std": 0.02469516545534134, + "abs_mean": 0.019984934478998184, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.7907887697219849, + "elements": 1024, + "histogram": { + "counts": [ + 5, + 23, + 26, + 66, + 101, + 118, + 134, + 139, + 117, + 103, + 81, + 46, + 24, + 11, + 3, + 2, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.06398282200098038, + -0.05501481518149376, + -0.04604680836200714, + -0.037078797817230225, + -0.028110790997743607, + -0.01914278417825699, + -0.010174773633480072, + -0.0012067705392837524, + 0.007761240005493164, + 0.01672925055027008, + 0.0256972536444664, + 0.03466526418924332, + 0.04363327473402023, + 0.05260127782821655, + 0.06156928092241287, + 0.07053729146718979, + 0.0795053020119667, + 0.08847331255674362, + 0.09744132310152054, + 0.10640931874513626, + 0.11537733674049377 + ] + } + }, + "transformer.layers.20.3.g": { + "min": 0.3749999403953552, + "max": 0.9300609230995178, + "mean": 0.7510109543800354, + "std": 0.040018972009420395, + "abs_mean": 0.7510109543800354, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 24.0664119720459, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 1, + 0, + 4, + 1, + 4, + 1, + 4, + 6, + 11, + 21, + 149, + 467, + 262, + 53, + 8, + 5, + 0, + 2 + ], + "bin_edges": [ + 0.3749999403953552, + 0.40275299549102783, + 0.43050605058670044, + 0.45825910568237305, + 0.48601213097572327, + 0.5137652158737183, + 0.5415182113647461, + 0.5692712664604187, + 0.5970243215560913, + 0.6247773766517639, + 0.6525304317474365, + 0.6802834868431091, + 0.7080365419387817, + 0.7357895970344543, + 0.763542652130127, + 0.7912956476211548, + 0.8190487623214722, + 0.8468017578125, + 0.8745548725128174, + 0.9023078680038452, + 0.9300609230995178 + ] + } + }, + "transformer.layers.20.4.ff.0.0.weight": { + "min": -0.27868181467056274, + "max": 0.27277180552482605, + "mean": -0.00016834630514495075, + "std": 0.041004978120326996, + "abs_mean": 0.03246169537305832, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 83.97103881835938, + "elements": 4194304, + "histogram": { + "counts": [ + 4, + 6, + 8, + 17, + 36, + 62, + 93, + 121, + 129, + 139, + 115, + 111, + 56, + 45, + 27, + 16, + 8, + 3, + 3, + 1 + ], + "bin_edges": [ + -0.1280173808336258, + -0.11423726379871368, + -0.10045714676380157, + -0.08667702972888947, + -0.07289691269397736, + -0.05911679565906525, + -0.04533667862415314, + -0.03155656158924103, + -0.01777644455432892, + -0.003996327519416809, + 0.0097837895154953, + 0.02356390655040741, + 0.03734402358531952, + 0.05112414062023163, + 0.06490425765514374, + 0.07868437469005585, + 0.09246449172496796, + 0.10624460875988007, + 0.12002472579479218, + 0.13380484282970428, + 0.1475849598646164 + ] + } + }, + "transformer.layers.20.4.ff.0.0.bias": { + "min": -0.19812321662902832, + "max": 0.05135354399681091, + "mean": -0.032012395560741425, + "std": 0.025048717856407166, + "abs_mean": 0.03351156413555145, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 2.6013293266296387, + "elements": 4096, + "histogram": { + "counts": [ + 1, + 1, + 0, + 2, + 1, + 2, + 4, + 6, + 11, + 22, + 53, + 116, + 184, + 203, + 192, + 129, + 53, + 16, + 2, + 2 + ], + "bin_edges": [ + -0.19812321662902832, + -0.185712069272995, + -0.17330093681812286, + -0.16088978946208954, + -0.1484786570072174, + -0.13606750965118408, + -0.12365636229515076, + -0.11124522238969803, + -0.0988340824842453, + -0.08642294257879257, + -0.07401180267333984, + -0.06160065531730652, + -0.04918950796127319, + -0.03677837550640106, + -0.024367228150367737, + -0.011956095695495605, + 0.0004550516605377197, + 0.012866199016571045, + 0.025277331471443176, + 0.0376884788274765, + 0.05009962618350983 + ] + } + }, + "transformer.layers.20.4.ff.2.weight": { + "min": -0.65754234790802, + "max": 0.5349372029304504, + "mean": -5.049940591561608e-05, + "std": 0.052857208997011185, + "abs_mean": 0.04115595668554306, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ], + "norm": 108.23979187011719, + "elements": 4194304, + "histogram": { + "counts": [ + 2, + 2, + 7, + 8, + 13, + 37, + 56, + 111, + 121, + 179, + 164, + 129, + 83, + 42, + 22, + 11, + 10, + 2, + 0, + 1 + ], + "bin_edges": [ + -0.2004147469997406, + -0.1797032356262207, + -0.158991739153862, + -0.1382802277803421, + -0.1175687238574028, + -0.0968572199344635, + -0.0761457085609436, + -0.0554342120885849, + -0.034722700715065, + -0.014011189341545105, + 0.006700307130813599, + 0.027411818504333496, + 0.048123329877853394, + 0.06883484125137329, + 0.0895463228225708, + 0.1102578341960907, + 0.1309693455696106, + 0.1516808569431305, + 0.1723923683166504, + 0.1931038498878479, + 0.213815376162529 + ] + } + }, + "transformer.layers.20.4.ff.2.bias": { + "min": -0.1923648864030838, + "max": 0.5813060998916626, + "mean": -0.0005128913326188922, + "std": 0.041049525141716, + "abs_mean": 0.028079785406589508, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.3130457401275635, + "elements": 1024, + "histogram": { + "counts": [ + 3, + 2, + 11, + 116, + 410, + 357, + 81, + 15, + 3, + 0, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.1923648864030838, + -0.15368133783340454, + -0.11499778926372528, + -0.07631424069404602, + -0.03763069212436676, + 0.0010528564453125, + 0.03973640501499176, + 0.07841996848583221, + 0.11710350215435028, + 0.15578703582286835, + 0.1944705992937088, + 0.23315416276454926, + 0.2718377113342285, + 0.3105212450027466, + 0.3492048382759094, + 0.3878883719444275, + 0.42657190561294556, + 0.4652554392814636, + 0.5039389729499817, + 0.5426225662231445, + 0.5813060998916626 + ] + } + }, + "transformer.layers.21.0.weight": { + "min": -0.417529821395874, + "max": 0.3719121813774109, + "mean": 6.524643140437547e-06, + "std": 0.021627992391586304, + "abs_mean": 0.017142053693532944, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ], + "norm": 31.3196964263916, + "elements": 2097152, + "histogram": { + "counts": [ + 1, + 0, + 2, + 8, + 12, + 40, + 68, + 97, + 154, + 148, + 133, + 139, + 86, + 64, + 23, + 15, + 4, + 3, + 1, + 2 + ], + "bin_edges": [ + -0.08539692312479019, + -0.07676888257265091, + -0.06814083456993103, + -0.05951279401779175, + -0.05088474974036217, + -0.04225670546293259, + -0.033628664910793304, + -0.025000620633363724, + -0.016372576355934143, + -0.007744535803794861, + 0.0008835121989250183, + 0.0095115527510643, + 0.018139593303203583, + 0.026767641305923462, + 0.035395681858062744, + 0.04402372986078262, + 0.052651770412921906, + 0.06127981096506119, + 0.06990785151720047, + 0.07853590697050095, + 0.08716394007205963 + ] + } + }, + "transformer.layers.21.1.g": { + "min": 0.21460720896720886, + "max": 0.7452309131622314, + "mean": 0.6493626832962036, + "std": 0.054172683507204056, + "abs_mean": 0.6493626832962036, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 20.85171890258789, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 2, + 0, + 0, + 2, + 4, + 1, + 4, + 3, + 6, + 6, + 20, + 35, + 57, + 175, + 270, + 260, + 126, + 28 + ], + "bin_edges": [ + 0.21460720896720886, + 0.24113839864730835, + 0.26766958832740784, + 0.2942007780075073, + 0.3207319378852844, + 0.3472631275653839, + 0.3737943172454834, + 0.4003254771232605, + 0.4268566966056824, + 0.4533878564834595, + 0.47991904616355896, + 0.5064502358436584, + 0.5329813957214355, + 0.5595126152038574, + 0.5860437750816345, + 0.6125749349594116, + 0.6391061544418335, + 0.6656373739242554, + 0.6921685338020325, + 0.7186996936798096, + 0.7452309131622314 + ] + } + }, + "transformer.layers.21.2.to_q.weight": { + "min": -0.20914840698242188, + "max": 0.19524669647216797, + "mean": 4.0109844121616334e-05, + "std": 0.03945964202284813, + "abs_mean": 0.031188296154141426, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 40.406219482421875, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 2, + 14, + 18, + 29, + 46, + 68, + 84, + 95, + 119, + 139, + 100, + 97, + 69, + 53, + 22, + 20, + 14, + 7, + 2 + ], + "bin_edges": [ + -0.12001488357782364, + -0.10817231237888336, + -0.09632974117994308, + -0.08448716998100281, + -0.07264459878206253, + -0.06080202758312225, + -0.048959456384181976, + -0.0371168851852417, + -0.025274313986301422, + -0.013431742787361145, + -0.001589171588420868, + 0.010253392159938812, + 0.022095970809459686, + 0.03393854945898056, + 0.04578111320734024, + 0.05762367695569992, + 0.0694662556052208, + 0.08130883425474167, + 0.09315139800310135, + 0.10499396175146103, + 0.1168365478515625 + ] + } + }, + "transformer.layers.21.2.to_q.bias": { + "min": -0.32907912135124207, + "max": 0.25925326347351074, + "mean": -0.003227418288588524, + "std": 0.05623279884457588, + "abs_mean": 0.039593495428562164, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.8015334606170654, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 1, + 4, + 10, + 17, + 29, + 43, + 111, + 235, + 293, + 148, + 56, + 29, + 14, + 2, + 5, + 0, + 2 + ], + "bin_edges": [ + -0.32907912135124207, + -0.2996625006198883, + -0.27024587988853455, + -0.2408292591571808, + -0.21141263842582703, + -0.18199601769447327, + -0.1525793969631195, + -0.12316277623176575, + -0.09374615550041199, + -0.06432953476905823, + -0.03491291403770447, + -0.005496293306350708, + 0.023920327425003052, + 0.05333694815635681, + 0.08275356888771057, + 0.11217018961906433, + 0.1415868103504181, + 0.17100343108177185, + 0.2004200518131256, + 0.22983667254447937, + 0.25925326347351074 + ] + } + }, + "transformer.layers.21.2.to_k.weight": { + "min": -0.20563212037086487, + "max": 0.25434860587120056, + "mean": 5.404070907388814e-05, + "std": 0.038562316447496414, + "abs_mean": 0.030502334237098694, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 39.487403869628906, + "elements": 1048576, + "histogram": { + "counts": [ + 4, + 3, + 21, + 18, + 41, + 70, + 91, + 129, + 118, + 142, + 115, + 78, + 58, + 59, + 24, + 14, + 10, + 3, + 1, + 1 + ], + "bin_edges": [ + -0.11593008786439896, + -0.10307550430297852, + -0.09022092819213867, + -0.07736634463071823, + -0.06451176106929779, + -0.05165718495845795, + -0.038802601397037506, + -0.025948025286197662, + -0.013093441724777222, + -0.000238858163356781, + 0.012615717947483063, + 0.025470294058322906, + 0.038324885070323944, + 0.05117946118116379, + 0.06403403729200363, + 0.07688862830400467, + 0.08974320441484451, + 0.10259778052568436, + 0.1154523715376854, + 0.12830695509910583, + 0.14116151630878448 + ] + } + }, + "transformer.layers.21.2.to_k.bias": { + "min": -6.2339768409729, + "max": 6.921723365783691, + "mean": 0.04828859120607376, + "std": 1.383695363998413, + "abs_mean": 0.8800080418586731, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 44.28359603881836, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 3, + 4, + 8, + 11, + 11, + 35, + 68, + 181, + 348, + 192, + 63, + 33, + 14, + 4, + 6, + 7, + 4, + 5, + 2 + ], + "bin_edges": [ + -6.2339768409729, + -5.5761919021606445, + -4.9184064865112305, + -4.260621547698975, + -3.6028366088867188, + -2.945051670074463, + -2.287266492843628, + -1.629481315612793, + -0.9716963768005371, + -0.31391143798828125, + 0.3438735008239746, + 1.0016589164733887, + 1.6594438552856445, + 2.3172287940979004, + 2.9750142097473145, + 3.632798671722412, + 4.290584087371826, + 4.94836950302124, + 5.606153964996338, + 6.263939380645752, + 6.921723365783691 + ] + } + }, + "transformer.layers.21.2.to_v.weight": { + "min": -0.20957675576210022, + "max": 0.23022468388080597, + "mean": -4.7416378947673365e-06, + "std": 0.04131784662604332, + "abs_mean": 0.03262507542967796, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 42.30889129638672, + "elements": 1048576, + "histogram": { + "counts": [ + 6, + 6, + 17, + 36, + 57, + 64, + 116, + 124, + 140, + 130, + 100, + 95, + 49, + 27, + 14, + 12, + 4, + 2, + 0, + 1 + ], + "bin_edges": [ + -0.12215577065944672, + -0.10773155093193054, + -0.09330733865499496, + -0.07888311892747879, + -0.06445890665054321, + -0.05003468692302704, + -0.035610467195510864, + -0.021186254918575287, + -0.0067620351910591125, + 0.007662177085876465, + 0.02208639681339264, + 0.036510616540908813, + 0.05093483626842499, + 0.06535905599594116, + 0.07978326082229614, + 0.09420748054981232, + 0.10863170027732849, + 0.12305592000484467, + 0.13748012483119965, + 0.15190435945987701, + 0.1663285791873932 + ] + } + }, + "transformer.layers.21.2.to_v.bias": { + "min": -0.043760623782873154, + "max": 0.03593071922659874, + "mean": -6.6086213337257504e-06, + "std": 0.012794941663742065, + "abs_mean": 0.01062579732388258, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.40923821926116943, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 1, + 1, + 6, + 14, + 28, + 68, + 87, + 103, + 90, + 105, + 103, + 104, + 91, + 86, + 54, + 40, + 12, + 3, + 3 + ], + "bin_edges": [ + -0.043760623782873154, + -0.03977605700492859, + -0.035791490226984024, + -0.03180692344903946, + -0.027822354808449745, + -0.02383778803050518, + -0.019853219389915466, + -0.0158686526119709, + -0.011884085834026337, + -0.007899519056081772, + -0.003914952278137207, + 6.961449980735779e-05, + 0.004054185003042221, + 0.008038751780986786, + 0.01202331855893135, + 0.016007885336875916, + 0.01999245211482048, + 0.023977022618055344, + 0.02796158567070961, + 0.03194615617394447, + 0.03593071922659874 + ] + } + }, + "transformer.layers.21.2.to_out.0.weight": { + "min": -0.3974460959434509, + "max": 0.3449029326438904, + "mean": -5.5259803048102185e-05, + "std": 0.0423947237432003, + "abs_mean": 0.03332170099020004, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 43.41182327270508, + "elements": 1048576, + "histogram": { + "counts": [ + 4, + 10, + 9, + 26, + 33, + 67, + 65, + 109, + 147, + 115, + 150, + 104, + 69, + 49, + 22, + 8, + 6, + 3, + 2, + 2 + ], + "bin_edges": [ + -0.130792036652565, + -0.11647318303585052, + -0.10215433686971664, + -0.08783548325300217, + -0.07351663708686829, + -0.05919778347015381, + -0.04487892985343933, + -0.03056008368730545, + -0.016241230070590973, + -0.0019223839044570923, + 0.012396469712257385, + 0.026715323328971863, + 0.04103417694568634, + 0.05535303056240082, + 0.0696718692779541, + 0.08399072289466858, + 0.09830957651138306, + 0.11262843012809753, + 0.12694726884365082, + 0.1412661224603653, + 0.15558499097824097 + ] + } + }, + "transformer.layers.21.2.to_out.0.bias": { + "min": -0.055080167949199677, + "max": 0.06271716207265854, + "mean": 0.0003585012163966894, + "std": 0.018664730712771416, + "abs_mean": 0.01492932066321373, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.5970898866653442, + "elements": 1024, + "histogram": { + "counts": [ + 4, + 3, + 16, + 28, + 39, + 53, + 71, + 106, + 116, + 132, + 118, + 107, + 77, + 57, + 37, + 19, + 11, + 4, + 1, + 1 + ], + "bin_edges": [ + -0.055080167949199677, + -0.049190301448106766, + -0.043300434947013855, + -0.037410568445920944, + -0.03152070194482803, + -0.025630835443735123, + -0.019740968942642212, + -0.013851102441549301, + -0.00796123594045639, + -0.0020713694393634796, + 0.003818497061729431, + 0.00970836728811264, + 0.015598230063915253, + 0.021488092839717865, + 0.027377963066101074, + 0.03326783329248428, + 0.039157696068286896, + 0.04504755884408951, + 0.05093742907047272, + 0.056827299296855927, + 0.06271716207265854 + ] + } + }, + "transformer.layers.21.3.g": { + "min": 0.3508152663707733, + "max": 1.0430189371109009, + "mean": 0.789574146270752, + "std": 0.048565711826086044, + "abs_mean": 0.789574146270752, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 25.314075469970703, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 1, + 1, + 2, + 0, + 1, + 4, + 3, + 2, + 8, + 22, + 141, + 390, + 360, + 50, + 6, + 3, + 1, + 1, + 2 + ], + "bin_edges": [ + 0.3508152663707733, + 0.3854254484176636, + 0.42003563046455383, + 0.4546458125114441, + 0.48925599455833435, + 0.5238661766052246, + 0.5584763288497925, + 0.5930865406990051, + 0.6276967525482178, + 0.6623069047927856, + 0.6969170570373535, + 0.7315272688865662, + 0.7661374807357788, + 0.8007476329803467, + 0.8353577852249146, + 0.869968056678772, + 0.9045782089233398, + 0.9391883611679077, + 0.9737985134124756, + 1.008408784866333, + 1.0430189371109009 + ] + } + }, + "transformer.layers.21.4.ff.0.0.weight": { + "min": -0.3336288034915924, + "max": 0.38612979650497437, + "mean": -0.00016904372023418546, + "std": 0.041490498930215836, + "abs_mean": 0.032748062163591385, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 84.96529388427734, + "elements": 4194304, + "histogram": { + "counts": [ + 6, + 8, + 14, + 28, + 56, + 74, + 111, + 133, + 137, + 145, + 99, + 86, + 49, + 25, + 13, + 8, + 2, + 2, + 1, + 3 + ], + "bin_edges": [ + -0.12654127180576324, + -0.1117871031165123, + -0.09703293442726135, + -0.08227875828742981, + -0.06752458959817886, + -0.05277042090892792, + -0.038016244769096375, + -0.02326207607984543, + -0.008507907390594482, + 0.0062462687492370605, + 0.02100042998790741, + 0.03575460612773895, + 0.050508782267570496, + 0.06526294350624084, + 0.08001711964607239, + 0.09477128088474274, + 0.10952545702457428, + 0.12427963316440582, + 0.13903380930423737, + 0.15378795564174652, + 0.16854214668273926 + ] + } + }, + "transformer.layers.21.4.ff.0.0.bias": { + "min": -0.15726615488529205, + "max": 0.05897233635187149, + "mean": -0.031808022409677505, + "std": 0.02507229521870613, + "abs_mean": 0.03385263308882713, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 2.5919737815856934, + "elements": 4096, + "histogram": { + "counts": [ + 2, + 2, + 4, + 8, + 10, + 13, + 19, + 65, + 70, + 132, + 125, + 138, + 133, + 98, + 79, + 53, + 28, + 14, + 5, + 2 + ], + "bin_edges": [ + -0.12798060476779938, + -0.11949160695075989, + -0.1110026091337204, + -0.10251360386610031, + -0.09402460604906082, + -0.08553560823202133, + -0.07704660296440125, + -0.06855760514736176, + -0.060068607330322266, + -0.051579609513282776, + -0.043090611696243286, + -0.0346016064286232, + -0.02611260861158371, + -0.01762361079454422, + -0.009134605526924133, + -0.0006456077098846436, + 0.007843390107154846, + 0.016332387924194336, + 0.024821385741233826, + 0.033310383558273315, + 0.041799396276474 + ] + } + }, + "transformer.layers.21.4.ff.2.weight": { + "min": -0.6961155533790588, + "max": 0.4685930609703064, + "mean": -8.521115523763001e-05, + "std": 0.05180642008781433, + "abs_mean": 0.0401139073073864, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ], + "norm": 106.08828735351562, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 3, + 4, + 9, + 27, + 46, + 87, + 141, + 190, + 184, + 148, + 84, + 43, + 20, + 8, + 2, + 0, + 1, + 0, + 2 + ], + "bin_edges": [ + -0.21053080260753632, + -0.18716660141944885, + -0.16380241513252258, + -0.14043821394443512, + -0.11707401275634766, + -0.09370981156826019, + -0.07034562528133392, + -0.04698142409324646, + -0.023617222905158997, + -0.0002530217170715332, + 0.02311117947101593, + 0.0464753657579422, + 0.06983955204486847, + 0.09320376813411713, + 0.1165679544210434, + 0.13993217051029205, + 0.16329635679721832, + 0.1866605430841446, + 0.21002475917339325, + 0.23338894546031952, + 0.25675317645072937 + ] + } + }, + "transformer.layers.21.4.ff.2.bias": { + "min": -0.24746476113796234, + "max": 0.32834842801094055, + "mean": -0.00026278701261617243, + "std": 0.041423212736845016, + "abs_mean": 0.030439719557762146, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.3249220848083496, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 2, + 3, + 21, + 79, + 228, + 305, + 227, + 103, + 20, + 6, + 0, + 2, + 1, + 0, + 0, + 0, + 2 + ], + "bin_edges": [ + -0.24746476113796234, + -0.21867410838603973, + -0.18988344073295593, + -0.16109278798103333, + -0.13230213522911072, + -0.10351146757602692, + -0.07472081482410431, + -0.04593014717102051, + -0.0171394944190979, + 0.011651173233985901, + 0.04044182598590851, + 0.06923247873783112, + 0.09802313148975372, + 0.12681378424167633, + 0.15560446679592133, + 0.18439511954784393, + 0.21318577229976654, + 0.24197642505168915, + 0.27076709270477295, + 0.29955774545669556, + 0.32834842801094055 + ] + } + }, + "transformer.layers.22.0.weight": { + "min": -0.2869253158569336, + "max": 0.35028234124183655, + "mean": -2.780619524855865e-06, + "std": 0.02424117736518383, + "abs_mean": 0.019041862338781357, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ], + "norm": 35.103759765625, + "elements": 2097152, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 1, + 1, + 7, + 13, + 24, + 73, + 106, + 207, + 207, + 171, + 95, + 56, + 30, + 4, + 3, + 1 + ], + "bin_edges": [ + -0.14472992718219757, + -0.13296078145503998, + -0.12119164317846298, + -0.10942250490188599, + -0.0976533591747284, + -0.0858842208981514, + -0.0741150826215744, + -0.06234593689441681, + -0.05057679861783981, + -0.03880766034126282, + -0.027038514614105225, + -0.015269368886947632, + -0.003500238060951233, + 0.00826890766620636, + 0.020038053393363953, + 0.03180718421936035, + 0.043576329946517944, + 0.05534547567367554, + 0.06711460649967194, + 0.07888375222682953, + 0.09065289795398712 + ] + } + }, + "transformer.layers.22.1.g": { + "min": 0.1968069076538086, + "max": 0.7775169014930725, + "mean": 0.6701230406761169, + "std": 0.058515764772892, + "abs_mean": 0.6701230406761169, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 21.525455474853516, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 2, + 0, + 0, + 1, + 1, + 1, + 0, + 3, + 9, + 6, + 5, + 21, + 35, + 84, + 153, + 283, + 270, + 111, + 14 + ], + "bin_edges": [ + 0.1968069076538086, + 0.2258424013853073, + 0.25487789511680603, + 0.28391340374946594, + 0.31294891238212585, + 0.3419843912124634, + 0.3710198998451233, + 0.4000554084777832, + 0.4290909171104431, + 0.45812639594078064, + 0.48716190457344055, + 0.5161974430084229, + 0.545232892036438, + 0.5742684006690979, + 0.6033039093017578, + 0.632339358329773, + 0.6613749265670776, + 0.6904103755950928, + 0.7194458842277527, + 0.7484813928604126, + 0.7775169014930725 + ] + } + }, + "transformer.layers.22.2.to_q.weight": { + "min": -0.2286878526210785, + "max": 0.23117558658123016, + "mean": -2.085552659991663e-05, + "std": 0.04044000059366226, + "abs_mean": 0.03183070570230484, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 41.409976959228516, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 1, + 6, + 7, + 13, + 34, + 52, + 64, + 94, + 125, + 135, + 131, + 117, + 78, + 60, + 36, + 26, + 10, + 4, + 6 + ], + "bin_edges": [ + -0.145659938454628, + -0.1320401132106781, + -0.1184203028678894, + -0.10480047762393951, + -0.09118065983057022, + -0.07756084203720093, + -0.06394101679325104, + -0.050321198999881744, + -0.03670138120651245, + -0.023081563413143158, + -0.009461745619773865, + 0.004158079624176025, + 0.017777904868125916, + 0.03139771521091461, + 0.0450175404548645, + 0.0586373507976532, + 0.07225717604160309, + 0.08587700128555298, + 0.09949681162834167, + 0.11311663687229156, + 0.12673646211624146 + ] + } + }, + "transformer.layers.22.2.to_q.bias": { + "min": -0.2196890264749527, + "max": 0.24058501422405243, + "mean": 0.0007775035337544978, + "std": 0.05580567941069603, + "abs_mean": 0.04114898666739464, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.7850829362869263, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 2, + 3, + 10, + 13, + 18, + 71, + 103, + 161, + 207, + 180, + 105, + 50, + 39, + 12, + 9, + 8, + 2, + 2, + 3 + ], + "bin_edges": [ + -0.2196890264749527, + -0.19667533040046692, + -0.17366161942481995, + -0.15064792335033417, + -0.1276342272758484, + -0.10462051630020142, + -0.08160682022571564, + -0.058593109250068665, + -0.035579413175582886, + -0.012565717101097107, + 0.010447993874549866, + 0.03346170485019684, + 0.05647538602352142, + 0.0794890969991684, + 0.10250280797481537, + 0.12551648914813995, + 0.14853020012378693, + 0.1715439110994339, + 0.19455759227275848, + 0.21757130324840546, + 0.24058501422405243 + ] + } + }, + "transformer.layers.22.2.to_k.weight": { + "min": -0.21652470529079437, + "max": 0.2261732518672943, + "mean": -7.23175035091117e-05, + "std": 0.03937419131398201, + "abs_mean": 0.03104310855269432, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 40.318748474121094, + "elements": 1048576, + "histogram": { + "counts": [ + 3, + 10, + 18, + 24, + 40, + 56, + 86, + 104, + 108, + 130, + 96, + 102, + 97, + 54, + 37, + 22, + 7, + 2, + 2, + 2 + ], + "bin_edges": [ + -0.11781381815671921, + -0.1056215912103653, + -0.09342936426401138, + -0.08123713731765747, + -0.06904491037130356, + -0.056852683424949646, + -0.044660456478595734, + -0.03246822953224182, + -0.02027600258588791, + -0.008083775639533997, + 0.004108451306819916, + 0.016300685703754425, + 0.02849290519952774, + 0.040685124695301056, + 0.052877359092235565, + 0.06506959348917007, + 0.07726181298494339, + 0.0894540324807167, + 0.10164626687765121, + 0.11383850127458572, + 0.12603072822093964 + ] + } + }, + "transformer.layers.22.2.to_k.bias": { + "min": -8.891955375671387, + "max": 9.054566383361816, + "mean": -0.0012135691940784454, + "std": 1.846129059791565, + "abs_mean": 1.065543293952942, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 59.04728698730469, + "elements": 1024, + "histogram": { + "counts": [ + 4, + 3, + 5, + 10, + 7, + 16, + 16, + 42, + 113, + 464, + 184, + 56, + 34, + 10, + 15, + 6, + 4, + 7, + 3, + 1 + ], + "bin_edges": [ + -8.464720726013184, + -7.588756561279297, + -6.712791919708252, + -5.836827754974365, + -4.96086311340332, + -4.084898948669434, + -3.208934783935547, + -2.332970142364502, + -1.4570059776306152, + -0.5810418128967285, + 0.2949228286743164, + 1.1708869934082031, + 2.04685115814209, + 2.9228153228759766, + 3.7987804412841797, + 4.674744606018066, + 5.550708770751953, + 6.42667293548584, + 7.302637100219727, + 8.178601264953613, + 9.054566383361816 + ] + } + }, + "transformer.layers.22.2.to_v.weight": { + "min": -0.2690034806728363, + "max": 0.25858405232429504, + "mean": 4.355451528681442e-05, + "std": 0.03841076418757439, + "abs_mean": 0.030269136652350426, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 39.33214569091797, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 2, + 7, + 7, + 14, + 24, + 54, + 84, + 113, + 124, + 138, + 148, + 99, + 74, + 46, + 35, + 20, + 8, + 1, + 1 + ], + "bin_edges": [ + -0.133734330534935, + -0.12091077864170074, + -0.1080872192978859, + -0.09526366740465164, + -0.08244010806083679, + -0.06961655616760254, + -0.056793004274368286, + -0.043969444930553436, + -0.031145893037319183, + -0.01832234114408493, + -0.005498781800270081, + 0.007324770092964172, + 0.020148321986198425, + 0.03297187387943268, + 0.045795440673828125, + 0.05861899256706238, + 0.07144254446029663, + 0.08426609635353088, + 0.09708964824676514, + 0.10991321504116058, + 0.12273675948381424 + ] + } + }, + "transformer.layers.22.2.to_v.bias": { + "min": -0.057884324342012405, + "max": 0.05789237469434738, + "mean": 0.0003543176280800253, + "std": 0.014708762988448143, + "abs_mean": 0.011816874146461487, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.4705871641635895, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 1, + 0, + 6, + 12, + 23, + 58, + 108, + 148, + 138, + 140, + 128, + 115, + 80, + 18, + 11, + 5, + 4, + 2, + 1 + ], + "bin_edges": [ + -0.057884324342012405, + -0.05209548771381378, + -0.04630665481090546, + -0.04051781818270683, + -0.03472898155450821, + -0.028940148651599884, + -0.02315131202340126, + -0.017362479120492935, + -0.011573642492294312, + -0.005784805864095688, + 4.027038812637329e-06, + 0.0057928599417209625, + 0.011581700295209885, + 0.01737053319811821, + 0.023159366101026535, + 0.028948206454515457, + 0.03473703935742378, + 0.04052587226033211, + 0.04631471261382103, + 0.052103545516729355, + 0.05789237469434738 + ] + } + }, + "transformer.layers.22.2.to_out.0.weight": { + "min": -0.263511061668396, + "max": 0.288027822971344, + "mean": -6.177674367791042e-05, + "std": 0.03907754644751549, + "abs_mean": 0.030490010976791382, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 40.01486587524414, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 5, + 8, + 12, + 10, + 35, + 65, + 103, + 136, + 154, + 157, + 130, + 73, + 45, + 39, + 8, + 11, + 3, + 2, + 2 + ], + "bin_edges": [ + -0.14389725029468536, + -0.1291889250278473, + -0.11448058485984802, + -0.09977225214242935, + -0.08506391942501068, + -0.07035558670759201, + -0.05564725399017334, + -0.04093892127275467, + -0.026230588555336, + -0.011522248387336731, + 0.0031860768795013428, + 0.017894402146339417, + 0.032602742314338684, + 0.04731108248233795, + 0.062019407749176025, + 0.0767277330160141, + 0.09143607318401337, + 0.10614441335201263, + 0.1208527535200119, + 0.13556106388568878, + 0.15026940405368805 + ] + } + }, + "transformer.layers.22.2.to_out.0.bias": { + "min": -0.044037725776433945, + "max": 0.037295691668987274, + "mean": -9.799870167626068e-05, + "std": 0.013339235447347164, + "abs_mean": 0.010626820847392082, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.426658570766449, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 2, + 7, + 8, + 19, + 33, + 46, + 71, + 96, + 128, + 117, + 111, + 111, + 83, + 61, + 50, + 26, + 12, + 12, + 5 + ], + "bin_edges": [ + -0.044037725776433945, + -0.03997105360031128, + -0.03590438514947891, + -0.03183771297335625, + -0.02777104265987873, + -0.023704372346401215, + -0.01963770017027855, + -0.015571029856801033, + -0.011504359543323517, + -0.0074376873672008514, + -0.0033710189163684845, + 0.0006956532597541809, + 0.004762325435876846, + 0.008828993886709213, + 0.012895666062831879, + 0.016962334513664246, + 0.02102900668978691, + 0.025095675140619278, + 0.029162351042032242, + 0.03322901949286461, + 0.037295691668987274 + ] + } + }, + "transformer.layers.22.3.g": { + "min": 0.339274525642395, + "max": 1.0903433561325073, + "mean": 0.8638954162597656, + "std": 0.06374805420637131, + "abs_mean": 0.8638954162597656, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 27.719741821289062, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 1, + 0, + 4, + 5, + 3, + 1, + 3, + 4, + 1, + 10, + 22, + 70, + 257, + 471, + 124, + 12, + 7, + 2, + 2 + ], + "bin_edges": [ + 0.339274525642395, + 0.3768279552459717, + 0.4143814146518707, + 0.4519348442554474, + 0.48948830366134644, + 0.5270417332649231, + 0.5645951628684998, + 0.6021486520767212, + 0.6397020816802979, + 0.6772555112838745, + 0.7148089408874512, + 0.7523623704910278, + 0.7899158000946045, + 0.8274692296981812, + 0.8650227189064026, + 0.9025761485099792, + 0.9401295781135559, + 0.9776830077171326, + 1.0152363777160645, + 1.0527899265289307, + 1.0903433561325073 + ] + } + }, + "transformer.layers.22.4.ff.0.0.weight": { + "min": -0.4230613112449646, + "max": 0.41900894045829773, + "mean": 0.00031366912298835814, + "std": 0.043512988835573196, + "abs_mean": 0.03395019471645355, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 89.10926055908203, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 0, + 3, + 1, + 9, + 20, + 43, + 80, + 112, + 176, + 161, + 157, + 96, + 62, + 44, + 23, + 8, + 0, + 3, + 1 + ], + "bin_edges": [ + -0.17678457498550415, + -0.15967991948127747, + -0.14257526397705078, + -0.1254706084728241, + -0.10836594551801682, + -0.09126129001379013, + -0.07415662705898285, + -0.057051971554756165, + -0.03994731605052948, + -0.022842660546302795, + -0.005738005042076111, + 0.011366650462150574, + 0.028471320867538452, + 0.04557597637176514, + 0.06268063187599182, + 0.0797852873802185, + 0.09688994288444519, + 0.11399459838867188, + 0.13109925389289856, + 0.14820390939712524, + 0.16530856490135193 + ] + } + }, + "transformer.layers.22.4.ff.0.0.bias": { + "min": -0.21445079147815704, + "max": 0.17045123875141144, + "mean": -0.029427748173475266, + "std": 0.03184095025062561, + "abs_mean": 0.03383331745862961, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 2.774671792984009, + "elements": 4096, + "histogram": { + "counts": [ + 2, + 0, + 3, + 3, + 4, + 7, + 46, + 96, + 159, + 227, + 257, + 147, + 46, + 1, + 0, + 1, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.21445079147815704, + -0.1952056884765625, + -0.17596058547496796, + -0.1567154824733734, + -0.13747039437294006, + -0.11822528392076492, + -0.09898018836975098, + -0.07973508536815643, + -0.06048998236656189, + -0.041244879364967346, + -0.021999776363372803, + -0.0027546733617782593, + 0.01649041473865509, + 0.03573553264141083, + 0.05498062074184418, + 0.07422573864459991, + 0.09347082674503326, + 0.11271591484546661, + 0.13196103274822235, + 0.1512061208486557, + 0.17045123875141144 + ] + } + }, + "transformer.layers.22.4.ff.2.weight": { + "min": -0.5979012846946716, + "max": 0.559224545955658, + "mean": -0.00014804149395786226, + "std": 0.053461432456970215, + "abs_mean": 0.04055660963058472, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ], + "norm": 109.4767837524414, + "elements": 4194304, + "histogram": { + "counts": [ + 2, + 0, + 0, + 4, + 3, + 20, + 39, + 65, + 102, + 175, + 205, + 155, + 102, + 63, + 35, + 14, + 11, + 2, + 0, + 3 + ], + "bin_edges": [ + -0.2224254310131073, + -0.20098626613616943, + -0.17954708635807037, + -0.1581079214811325, + -0.13666874170303345, + -0.11522957682609558, + -0.09379041194915771, + -0.07235123217105865, + -0.05091206729412079, + -0.029472902417182922, + -0.008033722639083862, + 0.013405442237854004, + 0.03484460711479187, + 0.056283771991729736, + 0.07772296667098999, + 0.09916213154792786, + 0.12060129642486572, + 0.1420404613018036, + 0.16347962617874146, + 0.1849188208580017, + 0.20635798573493958 + ] + } + }, + "transformer.layers.22.4.ff.2.bias": { + "min": -0.17863567173480988, + "max": 0.3767751455307007, + "mean": 0.0013495876919478178, + "std": 0.037288032472133636, + "abs_mean": 0.027615398168563843, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.1934159994125366, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 1, + 1, + 20, + 82, + 253, + 318, + 221, + 79, + 16, + 4, + 1, + 0, + 1, + 1, + 0, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.17863567173480988, + -0.15086513757705688, + -0.1230945885181427, + -0.09532405436038971, + -0.06755351275205612, + -0.039782971143722534, + -0.012012436985969543, + 0.01575811207294464, + 0.04352864623069763, + 0.07129918038845062, + 0.09906972944736481, + 0.126840278506279, + 0.1546107977628708, + 0.18238134682178497, + 0.21015189588069916, + 0.23792241513729095, + 0.26569294929504395, + 0.2934635281562805, + 0.3212340474128723, + 0.3490046262741089, + 0.3767751455307007 + ] + } + }, + "transformer.layers.23.0.weight": { + "min": -0.3942491412162781, + "max": 0.3687455952167511, + "mean": 3.7661615351680666e-05, + "std": 0.028617454692721367, + "abs_mean": 0.021983902901411057, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ], + "norm": 41.44097900390625, + "elements": 2097152, + "histogram": { + "counts": [ + 2, + 0, + 3, + 6, + 8, + 23, + 49, + 78, + 127, + 146, + 161, + 161, + 103, + 55, + 37, + 24, + 8, + 4, + 1, + 4 + ], + "bin_edges": [ + -0.10764998942613602, + -0.09715531021356583, + -0.08666063845157623, + -0.07616595923900604, + -0.06567128002643585, + -0.05517660826444626, + -0.04468192905187607, + -0.034187257289886475, + -0.023692578077316284, + -0.013197898864746094, + -0.0027032271027565002, + 0.00779145210981369, + 0.01828613132238388, + 0.02878081053495407, + 0.03927547484636307, + 0.04977015405893326, + 0.06026483327150345, + 0.07075951248407364, + 0.08125419169664383, + 0.09174885600805283, + 0.10224352777004242 + ] + } + }, + "transformer.layers.23.1.g": { + "min": 0.2907008230686188, + "max": 0.8258129358291626, + "mean": 0.7054593563079834, + "std": 0.06773429363965988, + "abs_mean": 0.7054593563079834, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 22.678415298461914, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 2, + 2, + 4, + 7, + 1, + 9, + 8, + 16, + 17, + 22, + 27, + 52, + 110, + 185, + 261, + 219, + 49, + 8 + ], + "bin_edges": [ + 0.2907008230686188, + 0.3174564242362976, + 0.34421202540397644, + 0.3709676265716553, + 0.3977232575416565, + 0.4244788587093353, + 0.45123445987701416, + 0.4779900908470154, + 0.5047456622123718, + 0.531501293182373, + 0.5582568645477295, + 0.5850124955177307, + 0.6117681264877319, + 0.6385236978530884, + 0.6652793288230896, + 0.6920349597930908, + 0.7187905311584473, + 0.7455461025238037, + 0.7723017334938049, + 0.7990573644638062, + 0.8258129358291626 + ] + } + }, + "transformer.layers.23.2.to_q.weight": { + "min": -0.9265665411949158, + "max": 1.0269814729690552, + "mean": -2.791242877719924e-05, + "std": 0.04764382541179657, + "abs_mean": 0.03451031446456909, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 48.78652572631836, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 5, + 1, + 2, + 6, + 9, + 23, + 52, + 71, + 111, + 195, + 192, + 150, + 95, + 45, + 19, + 12, + 5, + 3, + 2 + ], + "bin_edges": [ + -0.21141943335533142, + -0.19226078689098358, + -0.17310214042663574, + -0.1539434790611267, + -0.13478483259677887, + -0.11562618613243103, + -0.0964675322175026, + -0.07730887830257416, + -0.05815023183822632, + -0.03899158537387848, + -0.01983293890953064, + -0.0006742775440216064, + 0.018484368920326233, + 0.03764301538467407, + 0.056801676750183105, + 0.07596030831336975, + 0.09511896967887878, + 0.11427763104438782, + 0.13343626260757446, + 0.1525949239730835, + 0.17175358533859253 + ] + } + }, + "transformer.layers.23.2.to_q.bias": { + "min": -0.8779393434524536, + "max": 0.8145599365234375, + "mean": -0.0002924790605902672, + "std": 0.09544122219085693, + "abs_mean": 0.05420161783695221, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 3.0526418685913086, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 0, + 1, + 4, + 7, + 7, + 31, + 196, + 594, + 134, + 12, + 3, + 5, + 1, + 3, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.8779393434524536, + -0.7933143973350525, + -0.7086894512176514, + -0.6240644454956055, + -0.5394394993782043, + -0.4548145532608032, + -0.3701895475387573, + -0.2855646014213562, + -0.20093965530395508, + -0.11631470918655396, + -0.03168976306915283, + 0.052935242652893066, + 0.13756024837493896, + 0.2221851348876953, + 0.3068101406097412, + 0.39143502712249756, + 0.47606003284454346, + 0.5606850385665894, + 0.6453099250793457, + 0.7299349308013916, + 0.8145599365234375 + ] + } + }, + "transformer.layers.23.2.to_k.weight": { + "min": -0.27007606625556946, + "max": 0.24068056046962738, + "mean": -2.2448431991506368e-05, + "std": 0.038949914276599884, + "abs_mean": 0.030576692894101143, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 39.88426208496094, + "elements": 1048576, + "histogram": { + "counts": [ + 7, + 16, + 12, + 45, + 55, + 104, + 128, + 135, + 131, + 129, + 76, + 62, + 52, + 18, + 16, + 10, + 2, + 1, + 0, + 1 + ], + "bin_edges": [ + -0.10851769894361496, + -0.09493239969015121, + -0.08134710043668747, + -0.06776180118322372, + -0.05417650192975998, + -0.040591202676296234, + -0.02700590342283249, + -0.013420604169368744, + 0.00016469508409500122, + 0.013749994337558746, + 0.02733529359102249, + 0.04092059284448624, + 0.05450589209794998, + 0.06809119135141373, + 0.08167649060487747, + 0.09526178985834122, + 0.10884708911180496, + 0.12243238836526871, + 0.13601768016815186, + 0.1496029794216156, + 0.16318829357624054 + ] + } + }, + "transformer.layers.23.2.to_k.bias": { + "min": -23.705463409423828, + "max": 22.81535530090332, + "mean": -0.09178592264652252, + "std": 4.064526081085205, + "abs_mean": 1.7296252250671387, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 130.03448486328125, + "elements": 1024, + "histogram": { + "counts": [ + 4, + 8, + 5, + 6, + 5, + 13, + 10, + 22, + 235, + 610, + 32, + 9, + 5, + 11, + 7, + 4, + 4, + 4, + 3, + 3 + ], + "bin_edges": [ + -19.21725845336914, + -17.11562728881836, + -15.013997077941895, + -12.91236686706543, + -10.810735702514648, + -8.709104537963867, + -6.607474327087402, + -4.5058441162109375, + -2.4042129516601562, + -0.302581787109375, + 1.7990493774414062, + 3.9006786346435547, + 6.002309799194336, + 8.103940963745117, + 10.205570220947266, + 12.307201385498047, + 14.408832550048828, + 16.51046371459961, + 18.61209487915039, + 20.713726043701172, + 22.81535530090332 + ] + } + }, + "transformer.layers.23.2.to_v.weight": { + "min": -0.2275296449661255, + "max": 0.2455320507287979, + "mean": -2.5536401153658517e-05, + "std": 0.03864150494337082, + "abs_mean": 0.030268080532550812, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 39.56838607788086, + "elements": 1048576, + "histogram": { + "counts": [ + 3, + 2, + 6, + 13, + 27, + 41, + 67, + 108, + 126, + 126, + 141, + 112, + 88, + 59, + 38, + 18, + 9, + 7, + 6, + 3 + ], + "bin_edges": [ + -0.1285347193479538, + -0.11553225666284561, + -0.10252979397773743, + -0.08952733129262924, + -0.07652486860752106, + -0.06352240592241287, + -0.05051994323730469, + -0.0375174805521965, + -0.024515017867088318, + -0.011512555181980133, + 0.0014899075031280518, + 0.014492377638816833, + 0.02749483287334442, + 0.04049728810787201, + 0.05349975824356079, + 0.06650222837924957, + 0.07950468361377716, + 0.09250713884830475, + 0.10550960898399353, + 0.11851207911968231, + 0.1315145343542099 + ] + } + }, + "transformer.layers.23.2.to_v.bias": { + "min": -0.06007588282227516, + "max": 0.045354753732681274, + "mean": -0.00013596308417618275, + "std": 0.014683394692838192, + "abs_mean": 0.012035916559398174, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.4696592688560486, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 0, + 0, + 1, + 7, + 15, + 29, + 66, + 89, + 133, + 106, + 132, + 117, + 118, + 94, + 57, + 22, + 7, + 3, + 3 + ], + "bin_edges": [ + -0.06007588282227516, + -0.054804351180791855, + -0.04953281953930855, + -0.04426128789782524, + -0.038989756256341934, + -0.03371822461485863, + -0.02844669297337532, + -0.023175161331892014, + -0.017903629690408707, + -0.0126320980489254, + -0.007360566407442093, + -0.002089034765958786, + 0.003182496875524521, + 0.008454028517007828, + 0.013725560158491135, + 0.01899709179997444, + 0.02426862344145775, + 0.029540155082941055, + 0.03481168672442436, + 0.04008321836590767, + 0.045354753732681274 + ] + } + }, + "transformer.layers.23.2.to_out.0.weight": { + "min": -0.33782336115837097, + "max": 0.3746013939380646, + "mean": 7.420163456117734e-06, + "std": 0.04082043468952179, + "abs_mean": 0.03092447854578495, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 41.79957580566406, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 3, + 3, + 5, + 19, + 26, + 57, + 100, + 158, + 164, + 198, + 138, + 56, + 43, + 15, + 6, + 1, + 3, + 2, + 1 + ], + "bin_edges": [ + -0.1686936765909195, + -0.1513969749212265, + -0.13410025835037231, + -0.11680355668067932, + -0.09950684756040573, + -0.08221013844013214, + -0.06491343677043915, + -0.04761672765016556, + -0.030320018529891968, + -0.013023316860198975, + 0.004273399710655212, + 0.021570101380348206, + 0.0388668030500412, + 0.056163519620895386, + 0.07346022129058838, + 0.09075693786144257, + 0.10805363953113556, + 0.12535034120082855, + 0.14264704287052155, + 0.15994377434253693, + 0.1772404909133911 + ] + } + }, + "transformer.layers.23.2.to_out.0.bias": { + "min": -0.046125710010528564, + "max": 0.19506430625915527, + "mean": 0.0002738517359830439, + "std": 0.013541821390390396, + "abs_mean": 0.009806378744542599, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.43321529030799866, + "elements": 1024, + "histogram": { + "counts": [ + 5, + 34, + 168, + 349, + 326, + 103, + 14, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.046125710010528564, + -0.03406620770692825, + -0.02200670912861824, + -0.009947210550308228, + 0.0021122917532920837, + 0.014171794056892395, + 0.02623128890991211, + 0.03829079121351242, + 0.05035029351711273, + 0.06240979582071304, + 0.07446929812431335, + 0.08652879297733307, + 0.09858828783035278, + 0.11064779758453369, + 0.1227072924375534, + 0.13476680219173431, + 0.14682629704475403, + 0.15888579189777374, + 0.17094530165195465, + 0.18300479650497437, + 0.19506430625915527 + ] + } + }, + "transformer.layers.23.3.g": { + "min": 0.373764306306839, + "max": 1.1280238628387451, + "mean": 0.8901123404502869, + "std": 0.06384868174791336, + "abs_mean": 0.8901123404502869, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 28.556705474853516, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 0, + 0, + 0, + 3, + 4, + 3, + 6, + 9, + 14, + 15, + 26, + 96, + 327, + 377, + 101, + 12, + 1, + 3, + 1 + ], + "bin_edges": [ + 0.373764306306839, + 0.41147729754447937, + 0.44919025897979736, + 0.48690325021743774, + 0.5246162414550781, + 0.5623291730880737, + 0.6000422239303589, + 0.6377551555633545, + 0.6754681468009949, + 0.7131811380386353, + 0.7508940696716309, + 0.788607120513916, + 0.8263200521469116, + 0.864033043384552, + 0.9017460346221924, + 0.939458966255188, + 0.9771720170974731, + 1.0148849487304688, + 1.052597999572754, + 1.0903109312057495, + 1.1280238628387451 + ] + } + }, + "transformer.layers.23.4.ff.0.0.weight": { + "min": -0.44741326570510864, + "max": 0.5422499775886536, + "mean": 2.5218110749847256e-05, + "std": 0.045580700039863586, + "abs_mean": 0.03508353605866432, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 93.34017944335938, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 2, + 0, + 2, + 17, + 56, + 146, + 252, + 261, + 164, + 73, + 20, + 5, + 0, + 0, + 0, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.23515397310256958, + -0.20598602294921875, + -0.17681807279586792, + -0.1476501226425171, + -0.11848217248916626, + -0.08931422233581543, + -0.0601462721824646, + -0.03097832202911377, + -0.0018103718757629395, + 0.02735757827758789, + 0.05652552843093872, + 0.08569347858428955, + 0.11486142873764038, + 0.1440293788909912, + 0.17319732904434204, + 0.20236527919769287, + 0.2315332293510437, + 0.26070117950439453, + 0.28986912965774536, + 0.3190370798110962, + 0.34820500016212463 + ] + } + }, + "transformer.layers.23.4.ff.0.0.bias": { + "min": -0.22342835366725922, + "max": 0.08723597973585129, + "mean": -0.03199537843465805, + "std": 0.03770318627357483, + "abs_mean": 0.03751157969236374, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 3.1645314693450928, + "elements": 4096, + "histogram": { + "counts": [ + 1, + 2, + 4, + 2, + 5, + 5, + 8, + 9, + 14, + 30, + 64, + 111, + 129, + 143, + 153, + 162, + 89, + 53, + 14, + 2 + ], + "bin_edges": [ + -0.21711575984954834, + -0.20336702466011047, + -0.1896182745695114, + -0.17586952447891235, + -0.1621207892894745, + -0.14837205410003662, + -0.13462330400943756, + -0.1208745613694191, + -0.10712581872940063, + -0.09337707608938217, + -0.07962833344936371, + -0.06587958335876465, + -0.05213084816932678, + -0.038382112979888916, + -0.024633362889289856, + -0.010884612798690796, + 0.0028641223907470703, + 0.016612857580184937, + 0.030361607670783997, + 0.04411035776138306, + 0.057859089225530624 + ] + } + }, + "transformer.layers.23.4.ff.2.weight": { + "min": -0.7254156470298767, + "max": 0.6879446506500244, + "mean": 3.628328340710141e-05, + "std": 0.05179440602660179, + "abs_mean": 0.03891872242093086, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ], + "norm": 106.06195068359375, + "elements": 4194304, + "histogram": { + "counts": [ + 4, + 3, + 10, + 28, + 65, + 177, + 270, + 222, + 131, + 50, + 24, + 8, + 4, + 3, + 0, + 0, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.20179718732833862, + -0.17253395915031433, + -0.14327071607112885, + -0.11400748789310455, + -0.08474425226449966, + -0.055481016635894775, + -0.026217788457870483, + 0.0030454546213150024, + 0.032308682799339294, + 0.06157192587852478, + 0.09083515405654907, + 0.12009838223457336, + 0.14936161041259766, + 0.17862483859062195, + 0.20788809657096863, + 0.23715132474899292, + 0.2664145529270172, + 0.2956777811050415, + 0.3249410390853882, + 0.3542042374610901, + 0.3834674656391144 + ] + } + }, + "transformer.layers.23.4.ff.2.bias": { + "min": -0.174102783203125, + "max": 0.2178839147090912, + "mean": 3.535003634169698e-05, + "std": 0.03175075352191925, + "abs_mean": 0.023612717166543007, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.0155285596847534, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 1, + 2, + 3, + 2, + 20, + 64, + 183, + 253, + 268, + 131, + 48, + 16, + 4, + 0, + 2, + 0, + 1, + 0, + 1 + ], + "bin_edges": [ + -0.174102783203125, + -0.1545034497976303, + -0.13490411639213562, + -0.11530477553606033, + -0.09570544213056564, + -0.07610610872507095, + -0.056506767868995667, + -0.03690743446350098, + -0.017308101058006287, + 0.0022912323474884033, + 0.021890565752983093, + 0.04148989915847778, + 0.06108924746513367, + 0.08068856596946716, + 0.10028791427612305, + 0.11988723278045654, + 0.13948658108711243, + 0.1590859293937683, + 0.1786852478981018, + 0.1982845962047577, + 0.2178839147090912 + ] + } + }, + "transformer.layers.24.0.weight": { + "min": -0.33916032314300537, + "max": 0.37271323800086975, + "mean": 4.308380448492244e-05, + "std": 0.034135378897190094, + "abs_mean": 0.025202713906764984, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ], + "norm": 49.43122863769531, + "elements": 2097152, + "histogram": { + "counts": [ + 3, + 3, + 3, + 11, + 20, + 36, + 73, + 135, + 221, + 198, + 138, + 82, + 35, + 20, + 10, + 5, + 1, + 2, + 1, + 3 + ], + "bin_edges": [ + -0.13991793990135193, + -0.12436307221651077, + -0.10880820453166962, + -0.09325334429740906, + -0.0776984766125679, + -0.062143608927726746, + -0.046588748693466187, + -0.03103388100862503, + -0.015479013323783875, + 7.584691047668457e-05, + 0.015630722045898438, + 0.031185582280158997, + 0.046740442514419556, + 0.06229531764984131, + 0.07785017788410187, + 0.09340505301952362, + 0.10895991325378418, + 0.12451478838920593, + 0.1400696337223053, + 0.15562450885772705, + 0.1711793839931488 + ] + } + }, + "transformer.layers.24.1.g": { + "min": 0.3176645338535309, + "max": 1.2846463918685913, + "mean": 0.6014195084571838, + "std": 0.08323279023170471, + "abs_mean": 0.6014195084571838, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 19.42867660522461, + "elements": 1024, + "histogram": { + "counts": [ + 5, + 19, + 36, + 45, + 96, + 322, + 316, + 112, + 29, + 5, + 4, + 2, + 2, + 2, + 1, + 1, + 1, + 1, + 0, + 1 + ], + "bin_edges": [ + 0.3176645338535309, + 0.36601361632347107, + 0.41436272859573364, + 0.46271181106567383, + 0.511060893535614, + 0.5594099760055542, + 0.6077591180801392, + 0.6561082005500793, + 0.7044572830200195, + 0.7528063654899597, + 0.8011554479598999, + 0.8495045900344849, + 0.8978536128997803, + 0.9462027549743652, + 0.9945518970489502, + 1.0429009199142456, + 1.0912500619888306, + 1.139599084854126, + 1.187948226928711, + 1.236297369003296, + 1.2846463918685913 + ] + } + }, + "transformer.layers.24.2.to_q.weight": { + "min": -0.2829808294773102, + "max": 0.26017650961875916, + "mean": -3.0644375783595024e-06, + "std": 0.035980723798274994, + "abs_mean": 0.027858424931764603, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 36.84376525878906, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 1, + 3, + 9, + 14, + 32, + 59, + 84, + 121, + 163, + 175, + 128, + 100, + 48, + 34, + 14, + 5, + 3, + 2, + 3 + ], + "bin_edges": [ + -0.135683074593544, + -0.12228070199489594, + -0.10887832939624786, + -0.09547595679759979, + -0.08207358419895172, + -0.06867121160030365, + -0.05526883900165558, + -0.04186646640300751, + -0.028464093804359436, + -0.015061721205711365, + -0.0016593486070632935, + 0.011743023991584778, + 0.02514539659023285, + 0.03854776918888092, + 0.05195014178752899, + 0.06535251438617706, + 0.07875488698482513, + 0.0921572595834732, + 0.10555963218212128, + 0.11896200478076935, + 0.13236436247825623 + ] + } + }, + "transformer.layers.24.2.to_q.bias": { + "min": -0.23540745675563812, + "max": 0.20547473430633545, + "mean": 0.0002399118966422975, + "std": 0.056001532822847366, + "abs_mean": 0.043137334287166595, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.791190266609192, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 1, + 2, + 2, + 12, + 19, + 32, + 67, + 101, + 146, + 172, + 164, + 112, + 80, + 42, + 28, + 11, + 6, + 1, + 1 + ], + "bin_edges": [ + -0.23540745675563812, + -0.21336334943771362, + -0.19131922721862793, + -0.16927511990070343, + -0.14723101258277893, + -0.12518690526485443, + -0.10314278304576874, + -0.08109867572784424, + -0.05905456840991974, + -0.03701046109199524, + -0.01496635377407074, + 0.007077768445014954, + 0.029121890664100647, + 0.05116598308086395, + 0.07321010529994965, + 0.09525419771671295, + 0.11729831993579865, + 0.13934244215488434, + 0.16138653457164764, + 0.18343065679073334, + 0.20547473430633545 + ] + } + }, + "transformer.layers.24.2.to_k.weight": { + "min": -0.43518391251564026, + "max": 0.32444700598716736, + "mean": 2.422756006126292e-05, + "std": 0.03412417694926262, + "abs_mean": 0.026380151510238647, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 34.94275665283203, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 2, + 10, + 24, + 47, + 63, + 120, + 180, + 193, + 168, + 93, + 50, + 22, + 16, + 2, + 5, + 2, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.11856266856193542, + -0.10398302227258682, + -0.08940338343381882, + -0.07482373714447021, + -0.06024409458041191, + -0.04566445201635361, + -0.031084805727005005, + -0.016505166888237, + -0.0019255205988883972, + 0.012654125690460205, + 0.02723376452922821, + 0.041813403367996216, + 0.056393057107925415, + 0.07097269594669342, + 0.08555233478546143, + 0.10013198852539062, + 0.11471162736415863, + 0.12929126620292664, + 0.14387091994285583, + 0.15845054388046265, + 0.17303019762039185 + ] + } + }, + "transformer.layers.24.2.to_k.bias": { + "min": -5.537700176239014, + "max": 7.30228853225708, + "mean": -0.007349951192736626, + "std": 0.6983441114425659, + "abs_mean": 0.2657066583633423, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 22.33733558654785, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 2, + 2, + 3, + 8, + 9, + 5, + 23, + 847, + 71, + 12, + 8, + 3, + 3, + 0, + 0, + 0, + 1, + 0, + 2 + ], + "bin_edges": [ + -5.537700176239014, + -4.895700931549072, + -4.253701210021973, + -3.6117019653320312, + -2.9697024822235107, + -2.3277029991149902, + -1.6857037544250488, + -1.0437040328979492, + -0.4017047882080078, + 0.2402944564819336, + 0.8822941780090332, + 1.5242934226989746, + 2.166292667388916, + 2.8082919120788574, + 3.4502921104431152, + 4.092291355133057, + 4.734290599822998, + 5.3762898445129395, + 6.018289089202881, + 6.660289287567139, + 7.30228853225708 + ] + } + }, + "transformer.layers.24.2.to_v.weight": { + "min": -0.34386035799980164, + "max": 0.3621582090854645, + "mean": 0.00010323335300199687, + "std": 0.04783642664551735, + "abs_mean": 0.03668797016143799, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 48.983909606933594, + "elements": 1048576, + "histogram": { + "counts": [ + 3, + 4, + 1, + 8, + 15, + 35, + 56, + 107, + 168, + 195, + 164, + 102, + 70, + 37, + 14, + 15, + 3, + 1, + 1, + 1 + ], + "bin_edges": [ + -0.18854962289333344, + -0.16854910552501678, + -0.14854860305786133, + -0.12854808568954468, + -0.10854757577180862, + -0.08854706585407257, + -0.06854654848575592, + -0.048546046018600464, + -0.028545528650283813, + -0.008545011281967163, + 0.011455491185188293, + 0.031456008553504944, + 0.051456525921821594, + 0.07145704329013824, + 0.09145753085613251, + 0.11145804822444916, + 0.1314585655927658, + 0.15145908296108246, + 0.1714596003293991, + 0.19146008789539337, + 0.21146059036254883 + ] + } + }, + "transformer.layers.24.2.to_v.bias": { + "min": -0.07365774363279343, + "max": 0.060269735753536224, + "mean": 0.0009362755226902664, + "std": 0.014931198209524155, + "abs_mean": 0.012213543988764286, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.47850388288497925, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 2, + 2, + 0, + 0, + 1, + 7, + 52, + 124, + 144, + 140, + 145, + 164, + 135, + 60, + 13, + 4, + 3, + 0, + 3 + ], + "bin_edges": [ + -0.07365774363279343, + -0.066961370408535, + -0.06026499718427658, + -0.05356862023472786, + -0.04687224701046944, + -0.040175873786211014, + -0.03347949683666229, + -0.02678312361240387, + -0.020086750388145447, + -0.013390377163887024, + -0.006694003939628601, + 2.3692846298217773e-06, + 0.0066987499594688416, + 0.013395123183727264, + 0.020091496407985687, + 0.02678786963224411, + 0.03348424285650253, + 0.040180616080760956, + 0.04687698930501938, + 0.0535733625292778, + 0.060269735753536224 + ] + } + }, + "transformer.layers.24.2.to_out.0.weight": { + "min": -0.2561565041542053, + "max": 0.2865042984485626, + "mean": 4.9739428504835814e-06, + "std": 0.04156460985541344, + "abs_mean": 0.03219493851065636, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 42.561676025390625, + "elements": 1048576, + "histogram": { + "counts": [ + 4, + 2, + 9, + 11, + 24, + 43, + 56, + 77, + 109, + 123, + 150, + 116, + 94, + 72, + 41, + 43, + 12, + 7, + 5, + 2 + ], + "bin_edges": [ + -0.12750108540058136, + -0.11490428447723389, + -0.10230748355388641, + -0.08971068263053894, + -0.07711388170719147, + -0.064517080783844, + -0.05192027986049652, + -0.03932347893714905, + -0.026726678013801575, + -0.014129877090454102, + -0.0015330761671066284, + 0.011063724756240845, + 0.023660525679588318, + 0.03625732660293579, + 0.048854127526283264, + 0.06145092844963074, + 0.07404772937297821, + 0.08664453029632568, + 0.09924133121967316, + 0.11183813214302063, + 0.12443491816520691 + ] + } + }, + "transformer.layers.24.2.to_out.0.bias": { + "min": -0.055231235921382904, + "max": 0.06271004676818848, + "mean": 0.00012724015687126666, + "std": 0.0071450709365308285, + "abs_mean": 0.0050282771699130535, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.22856685519218445, + "elements": 1024, + "histogram": { + "counts": [ + 1, + 1, + 0, + 1, + 2, + 3, + 10, + 61, + 279, + 367, + 219, + 45, + 7, + 1, + 2, + 0, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.055231235921382904, + -0.049334172159433365, + -0.043437108397483826, + -0.03754004091024399, + -0.03164298087358475, + -0.02574591524899006, + -0.01984884962439537, + -0.013951785862445831, + -0.008054722100496292, + -0.002157658338546753, + 0.0037394054234027863, + 0.009636469185352325, + 0.015533536672592163, + 0.021430596709251404, + 0.02732766419649124, + 0.03322472423315048, + 0.03912179172039032, + 0.04501885920763016, + 0.0509159192442894, + 0.056812986731529236, + 0.06271004676818848 + ] + } + }, + "transformer.layers.24.3.g": { + "min": 0.49412763118743896, + "max": 1.2182179689407349, + "mean": 1.0133787393569946, + "std": 0.11725164949893951, + "abs_mean": 1.0133787393569946, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 32.644248962402344, + "elements": 1024, + "histogram": { + "counts": [ + 4, + 1, + 1, + 4, + 4, + 8, + 16, + 23, + 25, + 34, + 38, + 35, + 44, + 85, + 136, + 190, + 200, + 101, + 43, + 8 + ], + "bin_edges": [ + 0.49412763118743896, + 0.5303321480751038, + 0.5665366649627686, + 0.6027411818504333, + 0.6389456987380981, + 0.6751502156257629, + 0.7113547325134277, + 0.7475592494010925, + 0.7837637662887573, + 0.8199682831764221, + 0.8561728000640869, + 0.8923773169517517, + 0.9285818338394165, + 0.9647863507270813, + 1.000990867614746, + 1.0371954441070557, + 1.0733999013900757, + 1.1096043586730957, + 1.1458089351654053, + 1.1820135116577148, + 1.2182179689407349 + ] + } + }, + "transformer.layers.24.4.ff.0.0.weight": { + "min": -1.0939558744430542, + "max": 1.0474863052368164, + "mean": -4.8846173740457743e-05, + "std": 0.052417904138565063, + "abs_mean": 0.03914271295070648, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 107.33837127685547, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 2, + 0, + 0, + 2, + 3, + 6, + 13, + 41, + 76, + 113, + 190, + 179, + 154, + 120, + 62, + 22, + 10, + 1, + 5 + ], + "bin_edges": [ + -0.27871084213256836, + -0.2560090720653534, + -0.23330731689929962, + -0.21060556173324585, + -0.18790379166603088, + -0.1652020364999771, + -0.14250028133392334, + -0.11979851126670837, + -0.0970967561006546, + -0.07439500093460083, + -0.051693230867385864, + -0.028991475701332092, + -0.00628972053527832, + 0.016412049531936646, + 0.03911381959915161, + 0.06181555986404419, + 0.08451732993125916, + 0.10721909999847412, + 0.1299208402633667, + 0.15262261033058167, + 0.17532438039779663 + ] + } + }, + "transformer.layers.24.4.ff.0.0.bias": { + "min": -0.22328178584575653, + "max": 0.172784686088562, + "mean": -0.02721056528389454, + "std": 0.0362662672996521, + "abs_mean": 0.03248982131481171, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 2.9014923572540283, + "elements": 4096, + "histogram": { + "counts": [ + 1, + 4, + 2, + 2, + 1, + 9, + 4, + 23, + 19, + 29, + 48, + 109, + 177, + 239, + 193, + 95, + 29, + 11, + 2, + 3 + ], + "bin_edges": [ + -0.22328178584575653, + -0.20809650421142578, + -0.19291120767593384, + -0.1777259260416031, + -0.16254064440727234, + -0.1473553478717804, + -0.13217006623744965, + -0.1169847846031189, + -0.10179949551820755, + -0.0866142064332962, + -0.07142892479896545, + -0.056243643164634705, + -0.04105834662914276, + -0.02587306499481201, + -0.010687783360481262, + 0.004497513175010681, + 0.01968279480934143, + 0.03486807644367218, + 0.050053372979164124, + 0.06523863971233368, + 0.08042393624782562 + ] + } + }, + "transformer.layers.24.4.ff.2.weight": { + "min": -0.8832080960273743, + "max": 0.9217195510864258, + "mean": -0.00014604278840124607, + "std": 0.05329865962266922, + "abs_mean": 0.03896614536643028, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ], + "norm": 109.14219665527344, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 2, + 4, + 7, + 22, + 43, + 97, + 163, + 233, + 197, + 101, + 63, + 35, + 14, + 8, + 5, + 4, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.20571710169315338, + -0.18189872801303864, + -0.15808036923408508, + -0.13426199555397034, + -0.11044362187385559, + -0.08662524819374084, + -0.06280688941478729, + -0.038988515734672546, + -0.0151701420545578, + 0.008648231625556946, + 0.03246660530567169, + 0.05628497898578644, + 0.0801033228635788, + 0.10392169654369354, + 0.1277400702238083, + 0.15155844390392303, + 0.17537681758403778, + 0.19919519126415253, + 0.22301356494426727, + 0.24683193862438202, + 0.27065032720565796 + ] + } + }, + "transformer.layers.24.4.ff.2.bias": { + "min": -0.1707809567451477, + "max": 0.3790228068828583, + "mean": 0.003364440519362688, + "std": 0.03984135016798973, + "abs_mean": 0.02741703949868679, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 1.27884042263031, + "elements": 1024, + "histogram": { + "counts": [ + 3, + 0, + 6, + 23, + 106, + 261, + 336, + 194, + 47, + 8, + 9, + 0, + 1, + 2, + 2, + 0, + 0, + 1, + 0, + 1 + ], + "bin_edges": [ + -0.1707809567451477, + -0.14329077303409576, + -0.11580058187246323, + -0.08831039071083069, + -0.06082020699977875, + -0.03333002328872681, + -0.005839824676513672, + 0.02165035903453827, + 0.04914054274559021, + 0.07663072645664215, + 0.10412091016769409, + 0.13161110877990723, + 0.15910130739212036, + 0.1865914762020111, + 0.21408167481422424, + 0.241571843624115, + 0.2690620422363281, + 0.29655224084854126, + 0.324042409658432, + 0.35153257846832275, + 0.3790228068828583 + ] + } + }, + "transformer.layers.25.0.weight": { + "min": -0.7773804068565369, + "max": 0.7221406698226929, + "mean": 1.8065227777697146e-05, + "std": 0.04615423083305359, + "abs_mean": 0.03191829100251198, + "sparsity": 0.0, + "shape": [ + 1024, + 2048 + ], + "norm": 66.83527374267578, + "elements": 2097152, + "histogram": { + "counts": [ + 2, + 1, + 1, + 1, + 2, + 4, + 5, + 18, + 47, + 124, + 255, + 281, + 150, + 51, + 28, + 15, + 10, + 3, + 1, + 1 + ], + "bin_edges": [ + -0.28086602687835693, + -0.2556462585926056, + -0.23042649030685425, + -0.2052067071199417, + -0.17998693883419037, + -0.15476717054843903, + -0.1295473873615265, + -0.10432761907577515, + -0.0791078507900238, + -0.05388808250427246, + -0.028668314218521118, + -0.0034485459327697754, + 0.021771252155303955, + 0.0469910204410553, + 0.07221078872680664, + 0.09743055701255798, + 0.12265032529830933, + 0.14787009358406067, + 0.173089861869812, + 0.19830963015556335, + 0.2235294133424759 + ] + } + }, + "transformer.layers.25.1.g": { + "min": 0.33866187930107117, + "max": 1.425328254699707, + "mean": 0.9481796622276306, + "std": 0.20640140771865845, + "abs_mean": 0.9481796622276306, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 31.051620483398438, + "elements": 1024, + "histogram": { + "counts": [ + 5, + 9, + 9, + 26, + 44, + 41, + 29, + 40, + 48, + 61, + 72, + 91, + 134, + 120, + 135, + 83, + 44, + 6, + 2, + 1 + ], + "bin_edges": [ + 0.33866187930107117, + 0.3929952085018158, + 0.44732850790023804, + 0.5016618371009827, + 0.5559951663017273, + 0.6103284358978271, + 0.6646617650985718, + 0.7189950942993164, + 0.773328423500061, + 0.8276617527008057, + 0.8819950819015503, + 0.9363284111022949, + 0.9906617403030396, + 1.0449950695037842, + 1.0993282794952393, + 1.1536616086959839, + 1.2079949378967285, + 1.2623282670974731, + 1.3166615962982178, + 1.3709949254989624, + 1.425328254699707 + ] + } + }, + "transformer.layers.25.2.to_q.weight": { + "min": -1.7458044290542603, + "max": 1.704500436782837, + "mean": 0.00022708994220010936, + "std": 0.15870554745197296, + "abs_mean": 0.0743621215224266, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 162.51336669921875, + "elements": 1048576, + "histogram": { + "counts": [ + 2, + 4, + 6, + 6, + 9, + 11, + 14, + 150, + 679, + 56, + 13, + 20, + 12, + 8, + 3, + 2, + 1, + 0, + 2, + 2 + ], + "bin_edges": [ + -0.9227117896080017, + -0.81264328956604, + -0.7025748491287231, + -0.5925063490867615, + -0.4824378490447998, + -0.37236934900283813, + -0.26230090856552124, + -0.15223240852355957, + -0.0421639084815979, + 0.06790459156036377, + 0.17797309160232544, + 0.28804153203964233, + 0.39810997247695923, + 0.5081785321235657, + 0.6182469725608826, + 0.728315532207489, + 0.8383839726448059, + 0.9484524130821228, + 1.058521032333374, + 1.1685893535614014, + 1.2786579132080078 + ] + } + }, + "transformer.layers.25.2.to_q.bias": { + "min": -1.19757080078125, + "max": 1.0991984605789185, + "mean": -0.009535851888358593, + "std": 0.2035919725894928, + "abs_mean": 0.08614380657672882, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 6.518906593322754, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 1, + 6, + 6, + 8, + 15, + 8, + 9, + 12, + 66, + 780, + 35, + 9, + 10, + 12, + 9, + 5, + 2, + 1, + 4 + ], + "bin_edges": [ + -1.19757080078125, + -1.0827323198318481, + -0.9678938984870911, + -0.8530554175376892, + -0.7382169961929321, + -0.6233785152435303, + -0.5085400342941284, + -0.39370161294937134, + -0.2788631319999695, + -0.1640247106552124, + -0.04918622970581055, + 0.06565225124359131, + 0.18049073219299316, + 0.295329213142395, + 0.4101675748825073, + 0.5250060558319092, + 0.639844536781311, + 0.7546830177307129, + 0.8695213794708252, + 0.9843599796295166, + 1.0991984605789185 + ] + } + }, + "transformer.layers.25.2.to_k.weight": { + "min": -0.4207988381385803, + "max": 0.4279989004135132, + "mean": 6.386132736224681e-05, + "std": 0.04802023991942406, + "abs_mean": 0.03564126417040825, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 49.1722412109375, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 1, + 1, + 3, + 2, + 8, + 19, + 11, + 36, + 96, + 134, + 196, + 213, + 135, + 78, + 38, + 11, + 11, + 2, + 4 + ], + "bin_edges": [ + -0.24734075367450714, + -0.22657868266105652, + -0.2058166116476059, + -0.18505454063415527, + -0.16429248452186584, + -0.14353039860725403, + -0.1227683424949646, + -0.10200627148151398, + -0.08124420046806335, + -0.06048212945461273, + -0.03972005844116211, + -0.018957987427711487, + 0.001804068684577942, + 0.02256615459918976, + 0.04332821071147919, + 0.064090296626091, + 0.08485235273838043, + 0.10561440885066986, + 0.12637649476528168, + 0.1471385508775711, + 0.16790063679218292 + ] + } + }, + "transformer.layers.25.2.to_k.bias": { + "min": -19.71625328063965, + "max": 19.51169776916504, + "mean": -0.24800625443458557, + "std": 4.769559860229492, + "abs_mean": 1.9953224658966064, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 152.75767517089844, + "elements": 1024, + "histogram": { + "counts": [ + 7, + 7, + 15, + 13, + 9, + 12, + 5, + 1, + 7, + 355, + 506, + 9, + 0, + 1, + 9, + 8, + 14, + 8, + 8, + 6 + ], + "bin_edges": [ + -19.71625328063965, + -17.75485610961914, + -15.793457984924316, + -13.832060813903809, + -11.870662689208984, + -9.909265518188477, + -7.947868347167969, + -5.9864702224731445, + -4.025073051452637, + -2.0636749267578125, + -0.10227775573730469, + 1.8591194152832031, + 3.820516586303711, + 5.781913757324219, + 7.743312835693359, + 9.704710006713867, + 11.666107177734375, + 13.627504348754883, + 15.588903427124023, + 17.5502986907959, + 19.51169776916504 + ] + } + }, + "transformer.layers.25.2.to_v.weight": { + "min": -0.3236338496208191, + "max": 0.438272625207901, + "mean": -1.1853735486511141e-05, + "std": 0.04616710543632507, + "abs_mean": 0.03471195697784424, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 47.274478912353516, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 2, + 7, + 8, + 26, + 35, + 69, + 148, + 207, + 219, + 115, + 78, + 48, + 18, + 11, + 5, + 0, + 1, + 0, + 2 + ], + "bin_edges": [ + -0.17851462960243225, + -0.15869757533073425, + -0.13888052105903625, + -0.11906348168849945, + -0.09924642741680145, + -0.07942937314510345, + -0.05961232632398605, + -0.03979527950286865, + -0.019978225231170654, + -0.00016117095947265625, + 0.019655883312225342, + 0.039472922682762146, + 0.059289976954460144, + 0.07910701632499695, + 0.09892407059669495, + 0.11874112486839294, + 0.13855817914009094, + 0.15837523341178894, + 0.17819228768348694, + 0.19800934195518494, + 0.21782641112804413 + ] + } + }, + "transformer.layers.25.2.to_v.bias": { + "min": -0.03371698036789894, + "max": 0.03678824380040169, + "mean": 0.0006397695397026837, + "std": 0.0129077835008502, + "abs_mean": 0.010960067622363567, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.41335463523864746, + "elements": 1024, + "histogram": { + "counts": [ + 4, + 6, + 12, + 33, + 52, + 72, + 78, + 88, + 88, + 86, + 71, + 93, + 76, + 102, + 84, + 38, + 5, + 7, + 4, + 1 + ], + "bin_edges": [ + -0.03371698036789894, + -0.03019171953201294, + -0.026666458696126938, + -0.023141195997595787, + -0.019615935161709785, + -0.016090674325823784, + -0.012565411627292633, + -0.009040150791406631, + -0.00551488995552063, + -0.0019896291196346283, + 0.0015356317162513733, + 0.005060892552137375, + 0.008586157113313675, + 0.012111417949199677, + 0.015636678785085678, + 0.01916193962097168, + 0.02268720045685768, + 0.026212461292743683, + 0.029737722128629684, + 0.033262986689805984, + 0.03678824380040169 + ] + } + }, + "transformer.layers.25.2.to_out.0.weight": { + "min": -0.7031863331794739, + "max": 0.6687424182891846, + "mean": 4.257483305991627e-05, + "std": 0.057892125099897385, + "abs_mean": 0.039218515157699585, + "sparsity": 0.0, + "shape": [ + 1024, + 1024 + ], + "norm": 59.28053283691406, + "elements": 1048576, + "histogram": { + "counts": [ + 1, + 12, + 9, + 18, + 31, + 51, + 104, + 206, + 272, + 158, + 63, + 32, + 18, + 15, + 5, + 1, + 1, + 1, + 1, + 1 + ], + "bin_edges": [ + -0.21166379749774933, + -0.18592607975006104, + -0.16018837690353394, + -0.13445065915584564, + -0.10871294140815735, + -0.08297522366046906, + -0.057237520813941956, + -0.03149980306625366, + -0.005762085318565369, + 0.019975632429122925, + 0.04571335017681122, + 0.07145105302333832, + 0.09718875586986542, + 0.1229264885187149, + 0.148664191365242, + 0.1744019240140915, + 0.2001396268606186, + 0.2258773297071457, + 0.251615047454834, + 0.2773527503013611, + 0.30309048295021057 + ] + } + }, + "transformer.layers.25.2.to_out.0.bias": { + "min": -0.0722241997718811, + "max": 0.0676589161157608, + "mean": -0.0001341316383332014, + "std": 0.012878631241619587, + "abs_mean": 0.008325816132128239, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 0.4119373559951782, + "elements": 1024, + "histogram": { + "counts": [ + 3, + 3, + 6, + 1, + 4, + 5, + 7, + 38, + 79, + 223, + 346, + 171, + 72, + 23, + 8, + 2, + 4, + 2, + 1, + 2 + ], + "bin_edges": [ + -0.0722241997718811, + -0.06523004174232483, + -0.05823588743805885, + -0.05124173313379288, + -0.0442475751042366, + -0.03725342079997063, + -0.03025926649570465, + -0.023265108466148376, + -0.0162709541618824, + -0.009276799857616425, + -0.00228264182806015, + 0.004711516201496124, + 0.011705666780471802, + 0.018699824810028076, + 0.02569398283958435, + 0.03268813341856003, + 0.0396822914481163, + 0.04667644947767258, + 0.053670600056648254, + 0.06066475808620453, + 0.0676589161157608 + ] + } + }, + "transformer.layers.25.3.g": { + "min": 0.38035547733306885, + "max": 1.3902052640914917, + "mean": 1.066498041152954, + "std": 0.21949008107185364, + "abs_mean": 1.066498041152954, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 34.842498779296875, + "elements": 1024, + "histogram": { + "counts": [ + 2, + 7, + 11, + 24, + 24, + 30, + 16, + 20, + 26, + 38, + 39, + 43, + 59, + 60, + 89, + 119, + 174, + 135, + 69, + 15 + ], + "bin_edges": [ + 0.38035547733306885, + 0.43084797263145447, + 0.4813404679298401, + 0.5318329334259033, + 0.5823254585266113, + 0.6328179240226746, + 0.6833103895187378, + 0.7338029146194458, + 0.784295380115509, + 0.8347878456115723, + 0.8852803707122803, + 0.9357728362083435, + 0.9862653017044067, + 1.0367578268051147, + 1.0872502326965332, + 1.1377427577972412, + 1.1882352828979492, + 1.2387278079986572, + 1.2892203330993652, + 1.3397127389907837, + 1.3902052640914917 + ] + } + }, + "transformer.layers.25.4.ff.0.0.weight": { + "min": -0.6164002418518066, + "max": 0.7182905673980713, + "mean": 0.00011321296915411949, + "std": 0.05802781134843826, + "abs_mean": 0.0431867316365242, + "sparsity": 0.0, + "shape": [ + 4096, + 1024 + ], + "norm": 118.82687377929688, + "elements": 4194304, + "histogram": { + "counts": [ + 1, + 3, + 8, + 11, + 27, + 53, + 87, + 150, + 198, + 193, + 132, + 66, + 42, + 20, + 4, + 3, + 1, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.22690832614898682, + -0.20144495368003845, + -0.1759815663099289, + -0.15051819384098053, + -0.12505480647087097, + -0.09959143400192261, + -0.07412806153297424, + -0.048664674162864685, + -0.02320130169391632, + 0.0022620707750320435, + 0.0277254581451416, + 0.053188830614089966, + 0.07865220308303833, + 0.1041155755519867, + 0.12957897782325745, + 0.1550423502922058, + 0.18050572276115417, + 0.20596909523010254, + 0.2314324676990509, + 0.25689586997032166, + 0.2823592722415924 + ] + } + }, + "transformer.layers.25.4.ff.0.0.bias": { + "min": -0.2184617668390274, + "max": 0.22462666034698486, + "mean": 0.006169781554490328, + "std": 0.04965030029416084, + "abs_mean": 0.035798974335193634, + "sparsity": 0.0, + "shape": [ + 4096 + ], + "norm": 3.201674222946167, + "elements": 4096, + "histogram": { + "counts": [ + 1, + 1, + 1, + 0, + 4, + 13, + 27, + 78, + 190, + 228, + 194, + 93, + 54, + 30, + 30, + 26, + 20, + 6, + 3, + 1 + ], + "bin_edges": [ + -0.20836390554904938, + -0.18725517392158508, + -0.16614645719528198, + -0.1450377255678177, + -0.1239289939403534, + -0.1028202623128891, + -0.081711545586586, + -0.060602813959121704, + -0.03949408233165741, + -0.018385350704193115, + 0.002723380923271179, + 0.02383209764957428, + 0.04494081437587738, + 0.06604956090450287, + 0.08715827763080597, + 0.10826702415943146, + 0.12937574088573456, + 0.15048445761203766, + 0.17159320414066315, + 0.19270192086696625, + 0.21381068229675293 + ] + } + }, + "transformer.layers.25.4.ff.2.weight": { + "min": -0.6297575831413269, + "max": 0.8895801901817322, + "mean": 1.2445923857740127e-05, + "std": 0.023545311763882637, + "abs_mean": 0.015362618491053581, + "sparsity": 0.0, + "shape": [ + 1024, + 4096 + ], + "norm": 48.21366500854492, + "elements": 4194304, + "histogram": { + "counts": [ + 5, + 3, + 5, + 35, + 229, + 504, + 180, + 29, + 4, + 4, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 + ], + "bin_edges": [ + -0.12258616834878922, + -0.10009890794754028, + -0.07761164009571075, + -0.05512437969446182, + -0.032637111842632294, + -0.010149843990802765, + 0.01233740895986557, + 0.0348246768116951, + 0.05731194466352463, + 0.07979921251535416, + 0.10228648036718369, + 0.12477373331785202, + 0.14726099371910095, + 0.16974827647209167, + 0.19223552942276, + 0.21472281217575073, + 0.23721006512641907, + 0.2596973180770874, + 0.2821846008300781, + 0.30467185378074646, + 0.3271591365337372 + ] + } + }, + "transformer.layers.25.4.ff.2.bias": { + "min": -0.506031334400177, + "max": 0.47297078371047974, + "mean": -0.0030135007109493017, + "std": 0.0691458210349083, + "abs_mean": 0.029873624444007874, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 2.213686943054199, + "elements": 1024, + "histogram": { + "counts": [ + 4, + 4, + 2, + 2, + 2, + 3, + 1, + 6, + 6, + 201, + 670, + 87, + 1, + 2, + 0, + 1, + 1, + 1, + 1, + 5 + ], + "bin_edges": [ + -0.506031334400177, + -0.45708122849464417, + -0.40813112258911133, + -0.3591810166835785, + -0.31023091077804565, + -0.2612808048725128, + -0.21233069896697998, + -0.16338059306144714, + -0.1144304871559143, + -0.06548038125038147, + -0.016530275344848633, + 0.03241986036300659, + 0.08136993646621704, + 0.1303200125694275, + 0.17927014827728271, + 0.22822028398513794, + 0.2771703600883484, + 0.32612043619155884, + 0.37507057189941406, + 0.4240207076072693, + 0.47297078371047974 + ] + } + }, + "transformer.norm_out.g": { + "min": 0.5383259057998657, + "max": 1.1772801876068115, + "mean": 0.7824772596359253, + "std": 0.09824033081531525, + "abs_mean": 0.7824772596359253, + "sparsity": 0.0, + "shape": [ + 1024 + ], + "norm": 25.23565673828125, + "elements": 1024, + "histogram": { + "counts": [ + 3, + 19, + 63, + 60, + 66, + 97, + 101, + 119, + 118, + 123, + 91, + 66, + 35, + 21, + 13, + 2, + 2, + 0, + 0, + 1 + ], + "bin_edges": [ + 0.5383259057998657, + 0.5702736377716064, + 0.6022213101387024, + 0.6341690421104431, + 0.6661167740821838, + 0.6980644464492798, + 0.7300121784210205, + 0.7619599103927612, + 0.793907642364502, + 0.8258553147315979, + 0.8578030467033386, + 0.8897507190704346, + 0.9216984510421753, + 0.953646183013916, + 0.9855939149856567, + 1.0175416469573975, + 1.0494892597198486, + 1.081437110900879, + 1.11338472366333, + 1.1453324556350708, + 1.1772801876068115 + ] + } + }, + "transformer.proj_out.weight": { + "min": -0.26664498448371887, + "max": 0.2126948982477188, + "mean": -0.00022273289505392313, + "std": 0.05400582030415535, + "abs_mean": 0.043136853724718094, + "sparsity": 0.0, + "shape": [ + 100, + 1024 + ], + "norm": 17.281917572021484, + "elements": 102400, + "histogram": { + "counts": [ + 3, + 3, + 7, + 10, + 28, + 36, + 64, + 82, + 123, + 132, + 126, + 130, + 94, + 71, + 41, + 28, + 11, + 9, + 0, + 2 + ], + "bin_edges": [ + -0.18576863408088684, + -0.16740620136260986, + -0.1490437537431717, + -0.13068132102489471, + -0.11231888085603714, + -0.09395644068717957, + -0.07559400796890259, + -0.057231560349464417, + -0.03886912763118744, + -0.02050669491291046, + -0.00214424729347229, + 0.016218185424804688, + 0.034580618143081665, + 0.052943065762519836, + 0.07130551338195801, + 0.08966794610023499, + 0.10803037881851196, + 0.12639281153678894, + 0.14475524425506592, + 0.16311770677566528, + 0.18148015439510345 + ] + } + }, + "transformer.proj_out.bias": { + "min": -0.23798410594463348, + "max": 0.014864158816635609, + "mean": -0.04389958456158638, + "std": 0.03423725813627243, + "abs_mean": 0.045042671263217926, + "sparsity": 0.0, + "shape": [ + 100 + ], + "norm": 0.5556654930114746, + "elements": 100 + } + }, + "layer_importance_scores": { + "transformer.time_embed.time_mlp.0.weight": 85.03633259568599, + "transformer.time_embed.time_mlp.0.bias": 85.0001419242019, + "transformer.time_embed.time_mlp.2.weight": 85.14533038274396, + "transformer.time_embed.time_mlp.2.bias": 85.0001419242019, + "transformer.text_embed.text_embed.weight": 85.03528701347982, + "transformer.input_embed.proj.weight": 80.04257726056952, + "transformer.input_embed.proj.bias": 80.0001419242019, + "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 85.28157761656642, + "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 46.03894230581882, + "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 85.28157761656642, + "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 51.933818153226454, + "transformer.layers.0.1.g": 11.532497627358527, + "transformer.layers.0.2.to_q.weight": 60.14533038274396, + "transformer.layers.0.2.to_q.bias": 60.0001419242019, + "transformer.layers.0.2.to_k.weight": 60.14533038274396, + "transformer.layers.0.2.to_k.bias": 60.0001419242019, + "transformer.layers.0.2.to_v.weight": 60.14533038274396, + "transformer.layers.0.2.to_v.bias": 60.0001419242019, + "transformer.layers.0.2.to_out.0.weight": 60.14533038274396, + "transformer.layers.0.2.to_out.0.bias": 60.0001419242019, + "transformer.layers.0.3.g": 11.959340362928835, + "transformer.layers.0.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.0.4.ff.0.0.bias": 24.463798488610298, + "transformer.layers.0.4.ff.2.weight": 60.58132153097584, + "transformer.layers.0.4.ff.2.bias": 23.219212211070626, + "transformer.layers.1.1.g": 11.537370238647217, + "transformer.layers.1.2.to_q.weight": 60.14533038274396, + "transformer.layers.1.2.to_q.bias": 60.0001419242019, + "transformer.layers.1.2.to_k.weight": 60.14533038274396, + "transformer.layers.1.2.to_k.bias": 60.0001419242019, + "transformer.layers.1.2.to_v.weight": 60.14533038274396, + "transformer.layers.1.2.to_v.bias": 60.0001419242019, + "transformer.layers.1.2.to_out.0.weight": 60.14533038274396, + "transformer.layers.1.2.to_out.0.bias": 60.0001419242019, + "transformer.layers.1.3.g": 11.461919656381479, + "transformer.layers.1.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.1.4.ff.0.0.bias": 18.705593070190794, + "transformer.layers.1.4.ff.2.weight": 60.58132153097584, + "transformer.layers.1.4.ff.2.bias": 38.259093163379504, + "transformer.layers.2.1.g": 11.321299503885337, + "transformer.layers.2.2.to_q.weight": 60.14533038274396, + "transformer.layers.2.2.to_q.bias": 60.0001419242019, + "transformer.layers.2.2.to_k.weight": 60.14533038274396, + "transformer.layers.2.2.to_k.bias": 60.0001419242019, + "transformer.layers.2.2.to_v.weight": 60.14533038274396, + "transformer.layers.2.2.to_v.bias": 60.0001419242019, + "transformer.layers.2.2.to_out.0.weight": 60.14533038274396, + "transformer.layers.2.2.to_out.0.bias": 60.0001419242019, + "transformer.layers.2.3.g": 11.455490462959915, + "transformer.layers.2.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.2.4.ff.0.0.bias": 18.206288585332846, + "transformer.layers.2.4.ff.2.weight": 60.58132153097584, + "transformer.layers.2.4.ff.2.bias": 60.0001419242019, + "transformer.layers.3.1.g": 11.123831840415704, + "transformer.layers.3.2.to_q.weight": 60.14533038274396, + "transformer.layers.3.2.to_q.bias": 60.0001419242019, + "transformer.layers.3.2.to_k.weight": 60.14533038274396, + "transformer.layers.3.2.to_k.bias": 60.0001419242019, + "transformer.layers.3.2.to_v.weight": 60.14533038274396, + "transformer.layers.3.2.to_v.bias": 60.0001419242019, + "transformer.layers.3.2.to_out.0.weight": 60.14533038274396, + "transformer.layers.3.2.to_out.0.bias": 60.0001419242019, + "transformer.layers.3.3.g": 11.376823885895679, + "transformer.layers.3.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.3.4.ff.0.0.bias": 18.229989548202674, + "transformer.layers.3.4.ff.2.weight": 60.58132153097584, + "transformer.layers.3.4.ff.2.bias": 60.0001419242019, + "transformer.layers.4.1.g": 10.92711471707068, + "transformer.layers.4.2.to_q.weight": 60.14533038274396, + "transformer.layers.4.2.to_q.bias": 60.0001419242019, + "transformer.layers.4.2.to_k.weight": 60.14533038274396, + "transformer.layers.4.2.to_k.bias": 60.0001419242019, + "transformer.layers.4.2.to_v.weight": 60.14533038274396, + "transformer.layers.4.2.to_v.bias": 60.0001419242019, + "transformer.layers.4.2.to_out.0.weight": 60.14533038274396, + "transformer.layers.4.2.to_out.0.bias": 60.0001419242019, + "transformer.layers.4.3.g": 10.986938392283891, + "transformer.layers.4.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.4.4.ff.0.0.bias": 17.15491654224864, + "transformer.layers.4.4.ff.2.weight": 60.58132153097584, + "transformer.layers.4.4.ff.2.bias": 60.0001419242019, + "transformer.layers.5.1.g": 10.90192046488536, + "transformer.layers.5.2.to_q.weight": 60.14533038274396, + "transformer.layers.5.2.to_q.bias": 60.0001419242019, + "transformer.layers.5.2.to_k.weight": 60.14533038274396, + "transformer.layers.5.2.to_k.bias": 60.0001419242019, + "transformer.layers.5.2.to_v.weight": 60.14533038274396, + "transformer.layers.5.2.to_v.bias": 60.0001419242019, + "transformer.layers.5.2.to_out.0.weight": 60.14533038274396, + "transformer.layers.5.2.to_out.0.bias": 60.0001419242019, + "transformer.layers.5.3.g": 10.853532619025422, + "transformer.layers.5.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.5.4.ff.0.0.bias": 17.052553682686636, + "transformer.layers.5.4.ff.2.weight": 60.58132153097584, + "transformer.layers.5.4.ff.2.bias": 60.0001419242019, + "transformer.layers.6.1.g": 10.874520652442698, + "transformer.layers.6.2.to_q.weight": 60.14533038274396, + "transformer.layers.6.2.to_q.bias": 60.0001419242019, + "transformer.layers.6.2.to_k.weight": 60.14533038274396, + "transformer.layers.6.2.to_k.bias": 60.0001419242019, + "transformer.layers.6.2.to_v.weight": 60.14533038274396, + "transformer.layers.6.2.to_v.bias": 60.0001419242019, + "transformer.layers.6.2.to_out.0.weight": 60.14533038274396, + "transformer.layers.6.2.to_out.0.bias": 60.0001419242019, + "transformer.layers.6.3.g": 10.785119275660719, + "transformer.layers.6.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.6.4.ff.0.0.bias": 16.446733745711406, + "transformer.layers.6.4.ff.2.weight": 60.58132153097584, + "transformer.layers.6.4.ff.2.bias": 60.0001419242019, + "transformer.layers.7.1.g": 10.737621414760007, + "transformer.layers.7.2.to_q.weight": 60.14533038274396, + "transformer.layers.7.2.to_q.bias": 60.0001419242019, + "transformer.layers.7.2.to_k.weight": 60.14533038274396, + "transformer.layers.7.2.to_k.bias": 60.0001419242019, + "transformer.layers.7.2.to_v.weight": 60.14533038274396, + "transformer.layers.7.2.to_v.bias": 60.0001419242019, + "transformer.layers.7.2.to_out.0.weight": 60.14533038274396, + "transformer.layers.7.2.to_out.0.bias": 60.0001419242019, + "transformer.layers.7.3.g": 10.77605944709332, + "transformer.layers.7.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.7.4.ff.0.0.bias": 16.069873949263954, + "transformer.layers.7.4.ff.2.weight": 60.58132153097584, + "transformer.layers.7.4.ff.2.bias": 60.0001419242019, + "transformer.layers.8.1.g": 10.71839406869809, + "transformer.layers.8.2.to_q.weight": 60.14533038274396, + "transformer.layers.8.2.to_q.bias": 60.0001419242019, + "transformer.layers.8.2.to_k.weight": 60.14533038274396, + "transformer.layers.8.2.to_k.bias": 60.0001419242019, + "transformer.layers.8.2.to_v.weight": 60.14533038274396, + "transformer.layers.8.2.to_v.bias": 60.0001419242019, + "transformer.layers.8.2.to_out.0.weight": 60.14533038274396, + "transformer.layers.8.2.to_out.0.bias": 60.0001419242019, + "transformer.layers.8.3.g": 10.7574903396269, + "transformer.layers.8.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.8.4.ff.0.0.bias": 15.766046880990155, + "transformer.layers.8.4.ff.2.weight": 60.58132153097584, + "transformer.layers.8.4.ff.2.bias": 60.0001419242019, + "transformer.layers.9.1.g": 10.733062207419012, + "transformer.layers.9.2.to_q.weight": 60.14533038274396, + "transformer.layers.9.2.to_q.bias": 60.0001419242019, + "transformer.layers.9.2.to_k.weight": 60.14533038274396, + "transformer.layers.9.2.to_k.bias": 60.0001419242019, + "transformer.layers.9.2.to_v.weight": 60.14533038274396, + "transformer.layers.9.2.to_v.bias": 60.0001419242019, + "transformer.layers.9.2.to_out.0.weight": 60.14533038274396, + "transformer.layers.9.2.to_out.0.bias": 60.0001419242019, + "transformer.layers.9.3.g": 10.823180888090741, + "transformer.layers.9.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.9.4.ff.0.0.bias": 15.847623263476386, + "transformer.layers.9.4.ff.2.weight": 60.58132153097584, + "transformer.layers.9.4.ff.2.bias": 60.0001419242019, + "transformer.layers.10.1.g": 10.716516084338693, + "transformer.layers.10.2.to_q.weight": 60.14533038274396, + "transformer.layers.10.2.to_q.bias": 60.0001419242019, + "transformer.layers.10.2.to_k.weight": 60.14533038274396, + "transformer.layers.10.2.to_k.bias": 60.0001419242019, + "transformer.layers.10.2.to_v.weight": 60.14533038274396, + "transformer.layers.10.2.to_v.bias": 60.0001419242019, + "transformer.layers.10.2.to_out.0.weight": 60.14533038274396, + "transformer.layers.10.2.to_out.0.bias": 60.0001419242019, + "transformer.layers.10.3.g": 10.76196498496441, + "transformer.layers.10.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.10.4.ff.0.0.bias": 15.759326001570306, + "transformer.layers.10.4.ff.2.weight": 60.58132153097584, + "transformer.layers.10.4.ff.2.bias": 60.0001419242019, + "transformer.layers.11.1.g": 10.000141924201898, + "transformer.layers.11.2.to_q.weight": 60.14533038274396, + "transformer.layers.11.2.to_q.bias": 60.0001419242019, + "transformer.layers.11.2.to_k.weight": 60.14533038274396, + "transformer.layers.11.2.to_k.bias": 60.0001419242019, + "transformer.layers.11.2.to_v.weight": 0.14533038274395965, + "transformer.layers.11.2.to_v.bias": 0.0001419242018983981, + "transformer.layers.11.2.to_out.0.weight": 0.14533038274395965, + "transformer.layers.11.2.to_out.0.bias": 0.0001419242018983981, + "transformer.layers.11.3.g": 10.000141924201898, + "transformer.layers.11.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.11.4.ff.0.0.bias": 60.00056769680759, + "transformer.layers.11.4.ff.2.weight": 0.5813215309758386, + "transformer.layers.11.4.ff.2.bias": 0.0001419242018983981, + "transformer.layers.12.1.g": 10.665369544246982, + "transformer.layers.12.2.to_q.weight": 60.14533038274396, + "transformer.layers.12.2.to_q.bias": 60.0001419242019, + "transformer.layers.12.2.to_k.weight": 60.14533038274396, + "transformer.layers.12.2.to_k.bias": 60.0001419242019, + "transformer.layers.12.2.to_v.weight": 60.14533038274396, + "transformer.layers.12.2.to_v.bias": 60.0001419242019, + "transformer.layers.12.2.to_out.0.weight": 60.14533038274396, + "transformer.layers.12.2.to_out.0.bias": 60.0001419242019, + "transformer.layers.12.3.g": 10.74358912251763, + "transformer.layers.12.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.12.4.ff.0.0.bias": 15.775313068968435, + "transformer.layers.12.4.ff.2.weight": 60.58132153097584, + "transformer.layers.12.4.ff.2.bias": 60.0001419242019, + "transformer.layers.13.0.weight": 60.29066076548792, + "transformer.layers.13.1.g": 10.780254387046963, + "transformer.layers.13.2.to_q.weight": 60.14533038274396, + "transformer.layers.13.2.to_q.bias": 60.0001419242019, + "transformer.layers.13.2.to_k.weight": 60.14533038274396, + "transformer.layers.13.2.to_k.bias": 60.0001419242019, + "transformer.layers.13.2.to_v.weight": 60.14533038274396, + "transformer.layers.13.2.to_v.bias": 60.0001419242019, + "transformer.layers.13.2.to_out.0.weight": 60.14533038274396, + "transformer.layers.13.2.to_out.0.bias": 60.0001419242019, + "transformer.layers.13.3.g": 10.723090044959745, + "transformer.layers.13.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.13.4.ff.0.0.bias": 14.433770644846359, + "transformer.layers.13.4.ff.2.weight": 60.58132153097584, + "transformer.layers.13.4.ff.2.bias": 60.0001419242019, + "transformer.layers.14.0.weight": 50.29554357798792, + "transformer.layers.14.1.g": 10.000141924201898, + "transformer.layers.14.2.to_q.weight": 60.14533038274396, + "transformer.layers.14.2.to_q.bias": 60.0001419242019, + "transformer.layers.14.2.to_k.weight": 60.14532084600079, + "transformer.layers.14.2.to_k.bias": 60.0001419242019, + "transformer.layers.14.2.to_v.weight": 0.14533038274395965, + "transformer.layers.14.2.to_v.bias": 0.0001419242018983981, + "transformer.layers.14.2.to_out.0.weight": 0.14533038274395965, + "transformer.layers.14.2.to_out.0.bias": 0.0001419242018983981, + "transformer.layers.14.3.g": 10.000141924201898, + "transformer.layers.14.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.14.4.ff.0.0.bias": 60.00056769680759, + "transformer.layers.14.4.ff.2.weight": 0.5813215309758386, + "transformer.layers.14.4.ff.2.bias": 0.0001419242018983981, + "transformer.layers.15.0.weight": 60.29066076548792, + "transformer.layers.15.1.g": 10.786727982493778, + "transformer.layers.15.2.to_q.weight": 60.14533038274396, + "transformer.layers.15.2.to_q.bias": 60.0001419242019, + "transformer.layers.15.2.to_k.weight": 60.14533038274396, + "transformer.layers.15.2.to_k.bias": 60.0001419242019, + "transformer.layers.15.2.to_v.weight": 60.14533038274396, + "transformer.layers.15.2.to_v.bias": 60.0001419242019, + "transformer.layers.15.2.to_out.0.weight": 60.14533038274396, + "transformer.layers.15.2.to_out.0.bias": 60.0001419242019, + "transformer.layers.15.3.g": 10.558397360427929, + "transformer.layers.15.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.15.4.ff.0.0.bias": 14.440700574156022, + "transformer.layers.15.4.ff.2.weight": 60.58132153097584, + "transformer.layers.15.4.ff.2.bias": 60.0001419242019, + "transformer.layers.16.0.weight": 60.29066076548792, + "transformer.layers.16.1.g": 10.749619248285043, + "transformer.layers.16.2.to_q.weight": 60.14533038274396, + "transformer.layers.16.2.to_q.bias": 60.0001419242019, + "transformer.layers.16.2.to_k.weight": 60.14533038274396, + "transformer.layers.16.2.to_k.bias": 60.0001419242019, + "transformer.layers.16.2.to_v.weight": 60.14533038274396, + "transformer.layers.16.2.to_v.bias": 60.0001419242019, + "transformer.layers.16.2.to_out.0.weight": 60.14533038274396, + "transformer.layers.16.2.to_out.0.bias": 60.0001419242019, + "transformer.layers.16.3.g": 10.533921774332137, + "transformer.layers.16.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.16.4.ff.0.0.bias": 15.171028483803623, + "transformer.layers.16.4.ff.2.weight": 60.58132153097584, + "transformer.layers.16.4.ff.2.bias": 60.0001419242019, + "transformer.layers.17.0.weight": 60.29066076548792, + "transformer.layers.17.1.g": 10.770886668827734, + "transformer.layers.17.2.to_q.weight": 60.14533038274396, + "transformer.layers.17.2.to_q.bias": 60.0001419242019, + "transformer.layers.17.2.to_k.weight": 60.14533038274396, + "transformer.layers.17.2.to_k.bias": 60.0001419242019, + "transformer.layers.17.2.to_v.weight": 60.14533038274396, + "transformer.layers.17.2.to_v.bias": 60.0001419242019, + "transformer.layers.17.2.to_out.0.weight": 60.14533038274396, + "transformer.layers.17.2.to_out.0.bias": 60.0001419242019, + "transformer.layers.17.3.g": 10.494831667837571, + "transformer.layers.17.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.17.4.ff.0.0.bias": 15.415363766846895, + "transformer.layers.17.4.ff.2.weight": 60.58132153097584, + "transformer.layers.17.4.ff.2.bias": 60.0001419242019, + "transformer.layers.18.0.weight": 60.29066076548792, + "transformer.layers.18.1.g": 10.692741576840318, + "transformer.layers.18.2.to_q.weight": 60.14533038274396, + "transformer.layers.18.2.to_q.bias": 60.0001419242019, + "transformer.layers.18.2.to_k.weight": 60.14533038274396, + "transformer.layers.18.2.to_k.bias": 60.0001419242019, + "transformer.layers.18.2.to_v.weight": 60.14533038274396, + "transformer.layers.18.2.to_v.bias": 60.0001419242019, + "transformer.layers.18.2.to_out.0.weight": 60.14533038274396, + "transformer.layers.18.2.to_out.0.bias": 60.0001419242019, + "transformer.layers.18.3.g": 10.514765366862893, + "transformer.layers.18.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.18.4.ff.0.0.bias": 15.916675791742597, + "transformer.layers.18.4.ff.2.weight": 60.58132153097584, + "transformer.layers.18.4.ff.2.bias": 60.0001419242019, + "transformer.layers.19.0.weight": 60.29066076548792, + "transformer.layers.19.1.g": 10.767040605554461, + "transformer.layers.19.2.to_q.weight": 60.14533038274396, + "transformer.layers.19.2.to_q.bias": 60.0001419242019, + "transformer.layers.19.2.to_k.weight": 60.14533038274396, + "transformer.layers.19.2.to_k.bias": 60.0001419242019, + "transformer.layers.19.2.to_v.weight": 60.14533038274396, + "transformer.layers.19.2.to_v.bias": 60.0001419242019, + "transformer.layers.19.2.to_out.0.weight": 60.14533038274396, + "transformer.layers.19.2.to_out.0.bias": 60.0001419242019, + "transformer.layers.19.3.g": 10.55900162466435, + "transformer.layers.19.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.19.4.ff.0.0.bias": 16.944800765454332, + "transformer.layers.19.4.ff.2.weight": 60.58132153097584, + "transformer.layers.19.4.ff.2.bias": 60.0001419242019, + "transformer.layers.20.0.weight": 60.29066076548792, + "transformer.layers.20.1.g": 10.798194311900344, + "transformer.layers.20.2.to_q.weight": 60.14533038274396, + "transformer.layers.20.2.to_q.bias": 60.0001419242019, + "transformer.layers.20.2.to_k.weight": 60.14533038274396, + "transformer.layers.20.2.to_k.bias": 60.0001419242019, + "transformer.layers.20.2.to_v.weight": 60.14533038274396, + "transformer.layers.20.2.to_v.bias": 60.0001419242019, + "transformer.layers.20.2.to_out.0.weight": 60.14533038274396, + "transformer.layers.20.2.to_out.0.bias": 60.0001419242019, + "transformer.layers.20.3.g": 10.533009943982734, + "transformer.layers.20.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.20.4.ff.0.0.bias": 17.8252610437578, + "transformer.layers.20.4.ff.2.weight": 60.58132153097584, + "transformer.layers.20.4.ff.2.bias": 60.0001419242019, + "transformer.layers.21.0.weight": 60.29066076548792, + "transformer.layers.21.1.g": 10.834385789775144, + "transformer.layers.21.2.to_q.weight": 60.14533038274396, + "transformer.layers.21.2.to_q.bias": 60.0001419242019, + "transformer.layers.21.2.to_k.weight": 60.14533038274396, + "transformer.layers.21.2.to_k.bias": 60.0001419242019, + "transformer.layers.21.2.to_v.weight": 60.14533038274396, + "transformer.layers.21.2.to_v.bias": 60.0001419242019, + "transformer.layers.21.2.to_out.0.weight": 60.14533038274396, + "transformer.layers.21.2.to_out.0.bias": 60.0001419242019, + "transformer.layers.21.3.g": 10.61522933626405, + "transformer.layers.21.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.21.4.ff.0.0.bias": 17.882948718734298, + "transformer.layers.21.4.ff.2.weight": 60.58132153097584, + "transformer.layers.21.4.ff.2.bias": 60.0001419242019, + "transformer.layers.22.0.weight": 60.29066076548792, + "transformer.layers.22.1.g": 10.873351189083357, + "transformer.layers.22.2.to_q.weight": 60.14533038274396, + "transformer.layers.22.2.to_q.bias": 60.0001419242019, + "transformer.layers.22.2.to_k.weight": 60.14533038274396, + "transformer.layers.22.2.to_k.bias": 60.0001419242019, + "transformer.layers.22.2.to_v.weight": 60.14533038274396, + "transformer.layers.22.2.to_v.bias": 60.0001419242019, + "transformer.layers.22.2.to_out.0.weight": 60.14533038274396, + "transformer.layers.22.2.to_out.0.bias": 60.0001419242019, + "transformer.layers.22.3.g": 10.738055947202142, + "transformer.layers.22.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.22.4.ff.0.0.bias": 20.820610760559745, + "transformer.layers.22.4.ff.2.weight": 60.58132153097584, + "transformer.layers.22.4.ff.2.bias": 60.0001419242019, + "transformer.layers.23.0.weight": 60.29066076548792, + "transformer.layers.23.1.g": 10.960286446122295, + "transformer.layers.23.2.to_q.weight": 60.14533038274396, + "transformer.layers.23.2.to_q.bias": 60.0001419242019, + "transformer.layers.23.2.to_k.weight": 60.14533038274396, + "transformer.layers.23.2.to_k.bias": 60.0001419242019, + "transformer.layers.23.2.to_v.weight": 60.14533038274396, + "transformer.layers.23.2.to_v.bias": 60.0001419242019, + "transformer.layers.23.2.to_out.0.weight": 60.14533038274396, + "transformer.layers.23.2.to_out.0.bias": 60.0001419242019, + "transformer.layers.23.3.g": 10.717452300054163, + "transformer.layers.23.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.23.4.ff.0.0.bias": 21.784515291168272, + "transformer.layers.23.4.ff.2.weight": 60.58132153097584, + "transformer.layers.23.4.ff.2.bias": 60.0001419242019, + "transformer.layers.24.0.weight": 60.29066076548792, + "transformer.layers.24.1.g": 11.384080906248242, + "transformer.layers.24.2.to_q.weight": 60.14533038274396, + "transformer.layers.24.2.to_q.bias": 60.0001419242019, + "transformer.layers.24.2.to_k.weight": 60.14533038274396, + "transformer.layers.24.2.to_k.bias": 60.0001419242019, + "transformer.layers.24.2.to_v.weight": 60.14533038274396, + "transformer.layers.24.2.to_v.bias": 60.0001419242019, + "transformer.layers.24.2.to_out.0.weight": 60.14533038274396, + "transformer.layers.24.2.to_out.0.bias": 60.0001419242019, + "transformer.layers.24.3.g": 11.157178725401591, + "transformer.layers.24.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.24.4.ff.0.0.bias": 23.328577211228623, + "transformer.layers.24.4.ff.2.weight": 60.58132153097584, + "transformer.layers.24.4.ff.2.bias": 60.0001419242019, + "transformer.layers.25.0.weight": 60.29066076548792, + "transformer.layers.25.1.g": 12.176959419250716, + "transformer.layers.25.2.to_q.weight": 60.14533038274396, + "transformer.layers.25.2.to_q.bias": 60.0001419242019, + "transformer.layers.25.2.to_k.weight": 60.14533038274396, + "transformer.layers.25.2.to_k.bias": 60.0001419242019, + "transformer.layers.25.2.to_v.weight": 60.14533038274396, + "transformer.layers.25.2.to_v.bias": 60.0001419242019, + "transformer.layers.25.2.to_out.0.weight": 60.14533038274396, + "transformer.layers.25.2.to_out.0.bias": 60.0001419242019, + "transformer.layers.25.3.g": 12.058186783192642, + "transformer.layers.25.4.ff.0.0.weight": 60.58132153097584, + "transformer.layers.25.4.ff.0.0.bias": 60.00056769680759, + "transformer.layers.25.4.ff.2.weight": 60.58132153097584, + "transformer.layers.25.4.ff.2.bias": 60.0001419242019, + "transformer.norm_out.g": 26.255645897071666, + "transformer.proj_out.weight": 80.01419242018984, + "transformer.proj_out.bias": 37.799007512730114 + } +} \ No newline at end of file