{ "layer_types": { "transformer": 391 }, "parameter_counts": { "transformer.time_embed.time_mlp.0.weight": 262144, "transformer.time_embed.time_mlp.0.bias": 1024, "transformer.time_embed.time_mlp.2.weight": 1048576, "transformer.time_embed.time_mlp.2.bias": 1024, "transformer.text_embed.text_embed.weight": 254600, "transformer.input_embed.proj.weight": 307200, "transformer.input_embed.proj.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024, "transformer.layers.0.1.g": 1024, "transformer.layers.0.2.to_q.weight": 1048576, "transformer.layers.0.2.to_q.bias": 1024, "transformer.layers.0.2.to_k.weight": 1048576, "transformer.layers.0.2.to_k.bias": 1024, "transformer.layers.0.2.to_v.weight": 1048576, "transformer.layers.0.2.to_v.bias": 1024, "transformer.layers.0.2.to_out.0.weight": 1048576, "transformer.layers.0.2.to_out.0.bias": 1024, "transformer.layers.0.3.g": 1024, "transformer.layers.0.4.ff.0.0.weight": 4194304, "transformer.layers.0.4.ff.0.0.bias": 4096, "transformer.layers.0.4.ff.2.weight": 4194304, "transformer.layers.0.4.ff.2.bias": 1024, "transformer.layers.1.1.g": 1024, "transformer.layers.1.2.to_q.weight": 1048576, "transformer.layers.1.2.to_q.bias": 1024, "transformer.layers.1.2.to_k.weight": 1048576, "transformer.layers.1.2.to_k.bias": 1024, "transformer.layers.1.2.to_v.weight": 1048576, "transformer.layers.1.2.to_v.bias": 1024, "transformer.layers.1.2.to_out.0.weight": 1048576, "transformer.layers.1.2.to_out.0.bias": 1024, "transformer.layers.1.3.g": 1024, "transformer.layers.1.4.ff.0.0.weight": 4194304, "transformer.layers.1.4.ff.0.0.bias": 4096, "transformer.layers.1.4.ff.2.weight": 4194304, "transformer.layers.1.4.ff.2.bias": 1024, "transformer.layers.2.1.g": 1024, "transformer.layers.2.2.to_q.weight": 1048576, "transformer.layers.2.2.to_q.bias": 1024, "transformer.layers.2.2.to_k.weight": 1048576, "transformer.layers.2.2.to_k.bias": 1024, "transformer.layers.2.2.to_v.weight": 1048576, "transformer.layers.2.2.to_v.bias": 1024, "transformer.layers.2.2.to_out.0.weight": 1048576, "transformer.layers.2.2.to_out.0.bias": 1024, "transformer.layers.2.3.g": 1024, "transformer.layers.2.4.ff.0.0.weight": 4194304, "transformer.layers.2.4.ff.0.0.bias": 4096, "transformer.layers.2.4.ff.2.weight": 4194304, "transformer.layers.2.4.ff.2.bias": 1024, "transformer.layers.3.1.g": 1024, "transformer.layers.3.2.to_q.weight": 1048576, "transformer.layers.3.2.to_q.bias": 1024, "transformer.layers.3.2.to_k.weight": 1048576, "transformer.layers.3.2.to_k.bias": 1024, "transformer.layers.3.2.to_v.weight": 1048576, "transformer.layers.3.2.to_v.bias": 1024, "transformer.layers.3.2.to_out.0.weight": 1048576, "transformer.layers.3.2.to_out.0.bias": 1024, "transformer.layers.3.3.g": 1024, "transformer.layers.3.4.ff.0.0.weight": 4194304, "transformer.layers.3.4.ff.0.0.bias": 4096, "transformer.layers.3.4.ff.2.weight": 4194304, "transformer.layers.3.4.ff.2.bias": 1024, "transformer.layers.4.1.g": 1024, "transformer.layers.4.2.to_q.weight": 1048576, "transformer.layers.4.2.to_q.bias": 1024, "transformer.layers.4.2.to_k.weight": 1048576, "transformer.layers.4.2.to_k.bias": 1024, "transformer.layers.4.2.to_v.weight": 1048576, "transformer.layers.4.2.to_v.bias": 1024, "transformer.layers.4.2.to_out.0.weight": 1048576, "transformer.layers.4.2.to_out.0.bias": 1024, "transformer.layers.4.3.g": 1024, "transformer.layers.4.4.ff.0.0.weight": 4194304, "transformer.layers.4.4.ff.0.0.bias": 4096, "transformer.layers.4.4.ff.2.weight": 4194304, "transformer.layers.4.4.ff.2.bias": 1024, "transformer.layers.5.1.g": 1024, "transformer.layers.5.2.to_q.weight": 1048576, "transformer.layers.5.2.to_q.bias": 1024, "transformer.layers.5.2.to_k.weight": 1048576, "transformer.layers.5.2.to_k.bias": 1024, "transformer.layers.5.2.to_v.weight": 1048576, "transformer.layers.5.2.to_v.bias": 1024, "transformer.layers.5.2.to_out.0.weight": 1048576, "transformer.layers.5.2.to_out.0.bias": 1024, "transformer.layers.5.3.g": 1024, "transformer.layers.5.4.ff.0.0.weight": 4194304, "transformer.layers.5.4.ff.0.0.bias": 4096, "transformer.layers.5.4.ff.2.weight": 4194304, "transformer.layers.5.4.ff.2.bias": 1024, "transformer.layers.6.1.g": 1024, "transformer.layers.6.2.to_q.weight": 1048576, "transformer.layers.6.2.to_q.bias": 1024, "transformer.layers.6.2.to_k.weight": 1048576, "transformer.layers.6.2.to_k.bias": 1024, "transformer.layers.6.2.to_v.weight": 1048576, "transformer.layers.6.2.to_v.bias": 1024, "transformer.layers.6.2.to_out.0.weight": 1048576, "transformer.layers.6.2.to_out.0.bias": 1024, "transformer.layers.6.3.g": 1024, "transformer.layers.6.4.ff.0.0.weight": 4194304, "transformer.layers.6.4.ff.0.0.bias": 4096, "transformer.layers.6.4.ff.2.weight": 4194304, "transformer.layers.6.4.ff.2.bias": 1024, "transformer.layers.7.1.g": 1024, "transformer.layers.7.2.to_q.weight": 1048576, "transformer.layers.7.2.to_q.bias": 1024, "transformer.layers.7.2.to_k.weight": 1048576, "transformer.layers.7.2.to_k.bias": 1024, "transformer.layers.7.2.to_v.weight": 1048576, "transformer.layers.7.2.to_v.bias": 1024, "transformer.layers.7.2.to_out.0.weight": 1048576, "transformer.layers.7.2.to_out.0.bias": 1024, "transformer.layers.7.3.g": 1024, "transformer.layers.7.4.ff.0.0.weight": 4194304, "transformer.layers.7.4.ff.0.0.bias": 4096, "transformer.layers.7.4.ff.2.weight": 4194304, "transformer.layers.7.4.ff.2.bias": 1024, "transformer.layers.8.1.g": 1024, "transformer.layers.8.2.to_q.weight": 1048576, "transformer.layers.8.2.to_q.bias": 1024, "transformer.layers.8.2.to_k.weight": 1048576, "transformer.layers.8.2.to_k.bias": 1024, "transformer.layers.8.2.to_v.weight": 1048576, "transformer.layers.8.2.to_v.bias": 1024, "transformer.layers.8.2.to_out.0.weight": 1048576, "transformer.layers.8.2.to_out.0.bias": 1024, "transformer.layers.8.3.g": 1024, "transformer.layers.8.4.ff.0.0.weight": 4194304, "transformer.layers.8.4.ff.0.0.bias": 4096, "transformer.layers.8.4.ff.2.weight": 4194304, "transformer.layers.8.4.ff.2.bias": 1024, "transformer.layers.9.1.g": 1024, "transformer.layers.9.2.to_q.weight": 1048576, "transformer.layers.9.2.to_q.bias": 1024, "transformer.layers.9.2.to_k.weight": 1048576, "transformer.layers.9.2.to_k.bias": 1024, "transformer.layers.9.2.to_v.weight": 1048576, "transformer.layers.9.2.to_v.bias": 1024, "transformer.layers.9.2.to_out.0.weight": 1048576, "transformer.layers.9.2.to_out.0.bias": 1024, "transformer.layers.9.3.g": 1024, "transformer.layers.9.4.ff.0.0.weight": 4194304, "transformer.layers.9.4.ff.0.0.bias": 4096, "transformer.layers.9.4.ff.2.weight": 4194304, "transformer.layers.9.4.ff.2.bias": 1024, "transformer.layers.10.1.g": 1024, "transformer.layers.10.2.to_q.weight": 1048576, "transformer.layers.10.2.to_q.bias": 1024, "transformer.layers.10.2.to_k.weight": 1048576, "transformer.layers.10.2.to_k.bias": 1024, "transformer.layers.10.2.to_v.weight": 1048576, "transformer.layers.10.2.to_v.bias": 1024, "transformer.layers.10.2.to_out.0.weight": 1048576, "transformer.layers.10.2.to_out.0.bias": 1024, "transformer.layers.10.3.g": 1024, "transformer.layers.10.4.ff.0.0.weight": 4194304, "transformer.layers.10.4.ff.0.0.bias": 4096, "transformer.layers.10.4.ff.2.weight": 4194304, "transformer.layers.10.4.ff.2.bias": 1024, "transformer.layers.11.1.g": 1024, "transformer.layers.11.2.to_q.weight": 1048576, "transformer.layers.11.2.to_q.bias": 1024, "transformer.layers.11.2.to_k.weight": 1048576, "transformer.layers.11.2.to_k.bias": 1024, "transformer.layers.11.2.to_v.weight": 1048576, "transformer.layers.11.2.to_v.bias": 1024, "transformer.layers.11.2.to_out.0.weight": 1048576, "transformer.layers.11.2.to_out.0.bias": 1024, "transformer.layers.11.3.g": 1024, "transformer.layers.11.4.ff.0.0.weight": 4194304, "transformer.layers.11.4.ff.0.0.bias": 4096, "transformer.layers.11.4.ff.2.weight": 4194304, "transformer.layers.11.4.ff.2.bias": 1024, "transformer.layers.12.1.g": 1024, "transformer.layers.12.2.to_q.weight": 1048576, "transformer.layers.12.2.to_q.bias": 1024, "transformer.layers.12.2.to_k.weight": 1048576, "transformer.layers.12.2.to_k.bias": 1024, "transformer.layers.12.2.to_v.weight": 1048576, "transformer.layers.12.2.to_v.bias": 1024, "transformer.layers.12.2.to_out.0.weight": 1048576, "transformer.layers.12.2.to_out.0.bias": 1024, "transformer.layers.12.3.g": 1024, "transformer.layers.12.4.ff.0.0.weight": 4194304, "transformer.layers.12.4.ff.0.0.bias": 4096, "transformer.layers.12.4.ff.2.weight": 4194304, "transformer.layers.12.4.ff.2.bias": 1024, "transformer.layers.13.0.weight": 2097152, "transformer.layers.13.1.g": 1024, "transformer.layers.13.2.to_q.weight": 1048576, "transformer.layers.13.2.to_q.bias": 1024, "transformer.layers.13.2.to_k.weight": 1048576, "transformer.layers.13.2.to_k.bias": 1024, "transformer.layers.13.2.to_v.weight": 1048576, "transformer.layers.13.2.to_v.bias": 1024, "transformer.layers.13.2.to_out.0.weight": 1048576, "transformer.layers.13.2.to_out.0.bias": 1024, "transformer.layers.13.3.g": 1024, "transformer.layers.13.4.ff.0.0.weight": 4194304, "transformer.layers.13.4.ff.0.0.bias": 4096, "transformer.layers.13.4.ff.2.weight": 4194304, "transformer.layers.13.4.ff.2.bias": 1024, "transformer.layers.14.0.weight": 2097152, "transformer.layers.14.1.g": 1024, "transformer.layers.14.2.to_q.weight": 1048576, "transformer.layers.14.2.to_q.bias": 1024, "transformer.layers.14.2.to_k.weight": 1048576, "transformer.layers.14.2.to_k.bias": 1024, "transformer.layers.14.2.to_v.weight": 1048576, "transformer.layers.14.2.to_v.bias": 1024, "transformer.layers.14.2.to_out.0.weight": 1048576, "transformer.layers.14.2.to_out.0.bias": 1024, "transformer.layers.14.3.g": 1024, "transformer.layers.14.4.ff.0.0.weight": 4194304, "transformer.layers.14.4.ff.0.0.bias": 4096, "transformer.layers.14.4.ff.2.weight": 4194304, "transformer.layers.14.4.ff.2.bias": 1024, "transformer.layers.15.0.weight": 2097152, "transformer.layers.15.1.g": 1024, "transformer.layers.15.2.to_q.weight": 1048576, "transformer.layers.15.2.to_q.bias": 1024, "transformer.layers.15.2.to_k.weight": 1048576, "transformer.layers.15.2.to_k.bias": 1024, "transformer.layers.15.2.to_v.weight": 1048576, "transformer.layers.15.2.to_v.bias": 1024, "transformer.layers.15.2.to_out.0.weight": 1048576, "transformer.layers.15.2.to_out.0.bias": 1024, "transformer.layers.15.3.g": 1024, "transformer.layers.15.4.ff.0.0.weight": 4194304, "transformer.layers.15.4.ff.0.0.bias": 4096, "transformer.layers.15.4.ff.2.weight": 4194304, "transformer.layers.15.4.ff.2.bias": 1024, "transformer.layers.16.0.weight": 2097152, "transformer.layers.16.1.g": 1024, "transformer.layers.16.2.to_q.weight": 1048576, "transformer.layers.16.2.to_q.bias": 1024, "transformer.layers.16.2.to_k.weight": 1048576, "transformer.layers.16.2.to_k.bias": 1024, "transformer.layers.16.2.to_v.weight": 1048576, "transformer.layers.16.2.to_v.bias": 1024, "transformer.layers.16.2.to_out.0.weight": 1048576, "transformer.layers.16.2.to_out.0.bias": 1024, "transformer.layers.16.3.g": 1024, "transformer.layers.16.4.ff.0.0.weight": 4194304, "transformer.layers.16.4.ff.0.0.bias": 4096, "transformer.layers.16.4.ff.2.weight": 4194304, "transformer.layers.16.4.ff.2.bias": 1024, "transformer.layers.17.0.weight": 2097152, "transformer.layers.17.1.g": 1024, "transformer.layers.17.2.to_q.weight": 1048576, "transformer.layers.17.2.to_q.bias": 1024, "transformer.layers.17.2.to_k.weight": 1048576, "transformer.layers.17.2.to_k.bias": 1024, "transformer.layers.17.2.to_v.weight": 1048576, "transformer.layers.17.2.to_v.bias": 1024, "transformer.layers.17.2.to_out.0.weight": 1048576, "transformer.layers.17.2.to_out.0.bias": 1024, "transformer.layers.17.3.g": 1024, "transformer.layers.17.4.ff.0.0.weight": 4194304, "transformer.layers.17.4.ff.0.0.bias": 4096, "transformer.layers.17.4.ff.2.weight": 4194304, "transformer.layers.17.4.ff.2.bias": 1024, "transformer.layers.18.0.weight": 2097152, "transformer.layers.18.1.g": 1024, "transformer.layers.18.2.to_q.weight": 1048576, "transformer.layers.18.2.to_q.bias": 1024, "transformer.layers.18.2.to_k.weight": 1048576, "transformer.layers.18.2.to_k.bias": 1024, "transformer.layers.18.2.to_v.weight": 1048576, "transformer.layers.18.2.to_v.bias": 1024, "transformer.layers.18.2.to_out.0.weight": 1048576, "transformer.layers.18.2.to_out.0.bias": 1024, "transformer.layers.18.3.g": 1024, "transformer.layers.18.4.ff.0.0.weight": 4194304, "transformer.layers.18.4.ff.0.0.bias": 4096, "transformer.layers.18.4.ff.2.weight": 4194304, "transformer.layers.18.4.ff.2.bias": 1024, "transformer.layers.19.0.weight": 2097152, "transformer.layers.19.1.g": 1024, "transformer.layers.19.2.to_q.weight": 1048576, "transformer.layers.19.2.to_q.bias": 1024, "transformer.layers.19.2.to_k.weight": 1048576, "transformer.layers.19.2.to_k.bias": 1024, "transformer.layers.19.2.to_v.weight": 1048576, "transformer.layers.19.2.to_v.bias": 1024, "transformer.layers.19.2.to_out.0.weight": 1048576, "transformer.layers.19.2.to_out.0.bias": 1024, "transformer.layers.19.3.g": 1024, "transformer.layers.19.4.ff.0.0.weight": 4194304, "transformer.layers.19.4.ff.0.0.bias": 4096, "transformer.layers.19.4.ff.2.weight": 4194304, "transformer.layers.19.4.ff.2.bias": 1024, "transformer.layers.20.0.weight": 2097152, "transformer.layers.20.1.g": 1024, "transformer.layers.20.2.to_q.weight": 1048576, "transformer.layers.20.2.to_q.bias": 1024, "transformer.layers.20.2.to_k.weight": 1048576, "transformer.layers.20.2.to_k.bias": 1024, "transformer.layers.20.2.to_v.weight": 1048576, "transformer.layers.20.2.to_v.bias": 1024, "transformer.layers.20.2.to_out.0.weight": 1048576, "transformer.layers.20.2.to_out.0.bias": 1024, "transformer.layers.20.3.g": 1024, "transformer.layers.20.4.ff.0.0.weight": 4194304, "transformer.layers.20.4.ff.0.0.bias": 4096, "transformer.layers.20.4.ff.2.weight": 4194304, "transformer.layers.20.4.ff.2.bias": 1024, "transformer.layers.21.0.weight": 2097152, "transformer.layers.21.1.g": 1024, "transformer.layers.21.2.to_q.weight": 1048576, "transformer.layers.21.2.to_q.bias": 1024, "transformer.layers.21.2.to_k.weight": 1048576, "transformer.layers.21.2.to_k.bias": 1024, "transformer.layers.21.2.to_v.weight": 1048576, "transformer.layers.21.2.to_v.bias": 1024, "transformer.layers.21.2.to_out.0.weight": 1048576, "transformer.layers.21.2.to_out.0.bias": 1024, "transformer.layers.21.3.g": 1024, "transformer.layers.21.4.ff.0.0.weight": 4194304, "transformer.layers.21.4.ff.0.0.bias": 4096, "transformer.layers.21.4.ff.2.weight": 4194304, "transformer.layers.21.4.ff.2.bias": 1024, "transformer.layers.22.0.weight": 2097152, "transformer.layers.22.1.g": 1024, "transformer.layers.22.2.to_q.weight": 1048576, "transformer.layers.22.2.to_q.bias": 1024, "transformer.layers.22.2.to_k.weight": 1048576, "transformer.layers.22.2.to_k.bias": 1024, "transformer.layers.22.2.to_v.weight": 1048576, "transformer.layers.22.2.to_v.bias": 1024, "transformer.layers.22.2.to_out.0.weight": 1048576, "transformer.layers.22.2.to_out.0.bias": 1024, "transformer.layers.22.3.g": 1024, "transformer.layers.22.4.ff.0.0.weight": 4194304, "transformer.layers.22.4.ff.0.0.bias": 4096, "transformer.layers.22.4.ff.2.weight": 4194304, "transformer.layers.22.4.ff.2.bias": 1024, "transformer.layers.23.0.weight": 2097152, "transformer.layers.23.1.g": 1024, "transformer.layers.23.2.to_q.weight": 1048576, "transformer.layers.23.2.to_q.bias": 1024, "transformer.layers.23.2.to_k.weight": 1048576, "transformer.layers.23.2.to_k.bias": 1024, "transformer.layers.23.2.to_v.weight": 1048576, "transformer.layers.23.2.to_v.bias": 1024, "transformer.layers.23.2.to_out.0.weight": 1048576, "transformer.layers.23.2.to_out.0.bias": 1024, "transformer.layers.23.3.g": 1024, "transformer.layers.23.4.ff.0.0.weight": 4194304, "transformer.layers.23.4.ff.0.0.bias": 4096, "transformer.layers.23.4.ff.2.weight": 4194304, "transformer.layers.23.4.ff.2.bias": 1024, "transformer.layers.24.0.weight": 2097152, "transformer.layers.24.1.g": 1024, "transformer.layers.24.2.to_q.weight": 1048576, "transformer.layers.24.2.to_q.bias": 1024, "transformer.layers.24.2.to_k.weight": 1048576, "transformer.layers.24.2.to_k.bias": 1024, "transformer.layers.24.2.to_v.weight": 1048576, "transformer.layers.24.2.to_v.bias": 1024, "transformer.layers.24.2.to_out.0.weight": 1048576, "transformer.layers.24.2.to_out.0.bias": 1024, "transformer.layers.24.3.g": 1024, "transformer.layers.24.4.ff.0.0.weight": 4194304, "transformer.layers.24.4.ff.0.0.bias": 4096, "transformer.layers.24.4.ff.2.weight": 4194304, "transformer.layers.24.4.ff.2.bias": 1024, "transformer.layers.25.0.weight": 2097152, "transformer.layers.25.1.g": 1024, "transformer.layers.25.2.to_q.weight": 1048576, "transformer.layers.25.2.to_q.bias": 1024, "transformer.layers.25.2.to_k.weight": 1048576, "transformer.layers.25.2.to_k.bias": 1024, "transformer.layers.25.2.to_v.weight": 1048576, "transformer.layers.25.2.to_v.bias": 1024, "transformer.layers.25.2.to_out.0.weight": 1048576, "transformer.layers.25.2.to_out.0.bias": 1024, "transformer.layers.25.3.g": 1024, "transformer.layers.25.4.ff.0.0.weight": 4194304, "transformer.layers.25.4.ff.0.0.bias": 4096, "transformer.layers.25.4.ff.2.weight": 4194304, "transformer.layers.25.4.ff.2.bias": 1024, "transformer.norm_out.g": 1024, "transformer.proj_out.weight": 102400, "transformer.proj_out.bias": 100 }, "important_layers": [ "transformer.time_embed.time_mlp.0.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.text_embed.text_embed.weight", "transformer.input_embed.proj.weight", "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight", "transformer.layers.1.2.to_v.weight", "transformer.layers.1.2.to_out.0.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.2.to_q.weight", "transformer.layers.2.2.to_k.weight", "transformer.layers.2.2.to_v.weight", "transformer.layers.2.2.to_out.0.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.2.to_q.weight", "transformer.layers.3.2.to_k.weight", "transformer.layers.3.2.to_v.weight", "transformer.layers.3.2.to_out.0.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.2.to_q.weight", "transformer.layers.4.2.to_k.weight", "transformer.layers.4.2.to_v.weight", "transformer.layers.4.2.to_out.0.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.2.to_q.weight", "transformer.layers.5.2.to_k.weight", "transformer.layers.5.2.to_v.weight", "transformer.layers.5.2.to_out.0.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.2.to_q.weight", "transformer.layers.6.2.to_k.weight", "transformer.layers.6.2.to_v.weight", "transformer.layers.6.2.to_out.0.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.2.to_q.weight", "transformer.layers.7.2.to_k.weight", "transformer.layers.7.2.to_v.weight", "transformer.layers.7.2.to_out.0.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.11.4.ff.2.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.0.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.14.4.ff.2.weight", "transformer.layers.15.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight" ], "bottleneck_layers": [], "attention_layers": [], "projection_layers": [ "transformer.input_embed.proj.weight", "transformer.input_embed.proj.bias", "transformer.proj_out.weight", "transformer.proj_out.bias" ], "recommendations": { "critical_layers": { "layers": [ "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.time_embed.time_mlp.0.weight", "transformer.text_embed.text_embed.weight", "transformer.time_embed.time_mlp.0.bias", "transformer.time_embed.time_mlp.2.bias", "transformer.input_embed.proj.weight", "transformer.proj_out.weight", "transformer.input_embed.proj.bias", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.15.0.weight", "transformer.layers.16.0.weight", "transformer.layers.17.0.weight", "transformer.layers.18.0.weight", "transformer.layers.19.0.weight", "transformer.layers.20.0.weight", "transformer.layers.21.0.weight", "transformer.layers.22.0.weight", "transformer.layers.23.0.weight", "transformer.layers.24.0.weight", "transformer.layers.25.0.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight" ], "strategy": "targeted_enhancement", "enhancement_factor": 1.3 } }, "layer_connectivity": {}, "parameter_statistics": {}, "high_importance_layers": [ "transformer.input_embed.conv_pos_embed.conv1d.0.weight", "transformer.input_embed.conv_pos_embed.conv1d.2.weight", "transformer.time_embed.time_mlp.2.weight", "transformer.time_embed.time_mlp.0.weight", "transformer.text_embed.text_embed.weight", "transformer.time_embed.time_mlp.0.bias", "transformer.time_embed.time_mlp.2.bias", "transformer.input_embed.proj.weight", "transformer.proj_out.weight", "transformer.input_embed.proj.bias", "transformer.layers.0.4.ff.0.0.weight", "transformer.layers.0.4.ff.2.weight", "transformer.layers.1.4.ff.0.0.weight", "transformer.layers.1.4.ff.2.weight", "transformer.layers.2.4.ff.0.0.weight", "transformer.layers.2.4.ff.2.weight", "transformer.layers.3.4.ff.0.0.weight", "transformer.layers.3.4.ff.2.weight", "transformer.layers.4.4.ff.0.0.weight", "transformer.layers.4.4.ff.2.weight", "transformer.layers.5.4.ff.0.0.weight", "transformer.layers.5.4.ff.2.weight", "transformer.layers.6.4.ff.0.0.weight", "transformer.layers.6.4.ff.2.weight", "transformer.layers.7.4.ff.0.0.weight", "transformer.layers.7.4.ff.2.weight", "transformer.layers.8.4.ff.0.0.weight", "transformer.layers.8.4.ff.2.weight", "transformer.layers.9.4.ff.0.0.weight", "transformer.layers.9.4.ff.2.weight", "transformer.layers.10.4.ff.0.0.weight", "transformer.layers.10.4.ff.2.weight", "transformer.layers.11.4.ff.0.0.weight", "transformer.layers.12.4.ff.0.0.weight", "transformer.layers.12.4.ff.2.weight", "transformer.layers.13.4.ff.0.0.weight", "transformer.layers.13.4.ff.2.weight", "transformer.layers.14.4.ff.0.0.weight", "transformer.layers.15.4.ff.0.0.weight", "transformer.layers.15.4.ff.2.weight", "transformer.layers.16.4.ff.0.0.weight", "transformer.layers.16.4.ff.2.weight", "transformer.layers.17.4.ff.0.0.weight", "transformer.layers.17.4.ff.2.weight", "transformer.layers.18.4.ff.0.0.weight", "transformer.layers.18.4.ff.2.weight", "transformer.layers.19.4.ff.0.0.weight", "transformer.layers.19.4.ff.2.weight", "transformer.layers.20.4.ff.0.0.weight", "transformer.layers.20.4.ff.2.weight", "transformer.layers.21.4.ff.0.0.weight", "transformer.layers.21.4.ff.2.weight", "transformer.layers.22.4.ff.0.0.weight", "transformer.layers.22.4.ff.2.weight", "transformer.layers.23.4.ff.0.0.weight", "transformer.layers.23.4.ff.2.weight", "transformer.layers.24.4.ff.0.0.weight", "transformer.layers.24.4.ff.2.weight", "transformer.layers.25.4.ff.0.0.weight", "transformer.layers.25.4.ff.2.weight", "transformer.layers.13.0.weight", "transformer.layers.15.0.weight", "transformer.layers.16.0.weight", "transformer.layers.17.0.weight", "transformer.layers.18.0.weight", "transformer.layers.19.0.weight", "transformer.layers.20.0.weight", "transformer.layers.21.0.weight", "transformer.layers.22.0.weight", "transformer.layers.23.0.weight", "transformer.layers.24.0.weight", "transformer.layers.25.0.weight", "transformer.layers.0.2.to_q.weight", "transformer.layers.0.2.to_k.weight", "transformer.layers.0.2.to_v.weight", "transformer.layers.0.2.to_out.0.weight", "transformer.layers.1.2.to_q.weight", "transformer.layers.1.2.to_k.weight" ], "total_parameters": 391, "total_elements": 360755948, "param_ranges": { "transformer.time_embed.time_mlp.0.weight": { "min": -0.43014463782310486, "max": 0.2980782687664032, "mean": -0.002543725073337555, "std": 0.04256265610456467, "abs_mean": 0.03249503672122955, "sparsity": 0.0, "shape": [ 1024, 256 ], "norm": 21.830894470214844, "elements": 262144, "histogram": { "counts": [ 5, 10, 23, 43, 101, 144, 198, 176, 132, 71, 49, 23, 8, 9, 3, 2, 1, 0, 1, 1 ], "bin_edges": [ -0.1300935000181198, -0.11207325011491776, -0.0940530002117157, -0.07603274285793304, -0.05801249295473099, -0.03999224305152893, -0.021971985697746277, -0.003951743245124817, 0.014068514108657837, 0.03208877146244049, 0.05010901391506195, 0.0681292712688446, 0.08614952862262726, 0.10416977107524872, 0.12219001352787018, 0.14021028578281403, 0.15823052823543549, 0.17625077068805695, 0.1942710429430008, 0.21229128539562225, 0.23031151294708252 ] } }, "transformer.time_embed.time_mlp.0.bias": { "min": -0.0628998726606369, "max": 0.1072736531496048, "mean": 0.0006290247547440231, "std": 0.034041259437799454, "abs_mean": 0.027421049773693085, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.0889742374420166, "elements": 1024, "histogram": { "counts": [ 26, 60, 59, 79, 70, 83, 106, 99, 92, 89, 51, 40, 47, 36, 21, 13, 17, 7, 2, 3 ], "bin_edges": [ -0.0628998726606369, -0.054391197860240936, -0.04588251933455467, -0.03737384080886841, -0.028865166008472443, -0.020356491208076477, -0.011847812682390213, -0.003339134156703949, 0.005169540643692017, 0.013678215444087982, 0.022186890244483948, 0.03069557249546051, 0.039204247295856476, 0.04771292209625244, 0.056221604347229004, 0.06473027169704437, 0.07323895394802094, 0.0817476361989975, 0.09025630354881287, 0.09876498579978943, 0.1072736531496048 ] } }, "transformer.time_embed.time_mlp.2.weight": { "min": -0.41270536184310913, "max": 0.8369129300117493, "mean": -0.00020170127390883863, "std": 0.024111710488796234, "abs_mean": 0.01558289397507906, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 24.69097328186035, "elements": 1048576, "histogram": { "counts": [ 3, 7, 7, 2, 25, 63, 149, 224, 209, 172, 82, 35, 10, 4, 3, 2, 0, 0, 1, 2 ], "bin_edges": [ -0.08513874560594559, -0.07475044578313828, -0.06436215341091156, -0.05397385358810425, -0.043585557490587234, -0.03319726139307022, -0.02280896157026291, -0.012420669198036194, -0.002032369375228882, 0.00835593044757843, 0.018744222819805145, 0.029132522642612457, 0.03952082246541977, 0.04990912228822708, 0.0602974072098732, 0.07068570703268051, 0.08107400685548782, 0.09146230667829514, 0.10185060650110245, 0.11223889142274857, 0.12262718379497528 ] } }, "transformer.time_embed.time_mlp.2.bias": { "min": -0.11501855403184891, "max": 0.3208469748497009, "mean": -0.0009418133413419127, "std": 0.019536493346095085, "abs_mean": 0.01235988549888134, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.6255888938903809, "elements": 1024, "histogram": { "counts": [ 1, 2, 3, 13, 370, 489, 115, 3, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1 ], "bin_edges": [ -0.11501855403184891, -0.09322527796030045, -0.07143200188875198, -0.04963872581720352, -0.02784544974565506, -0.006052173674106598, 0.015741102397441864, 0.037534378468990326, 0.05932765454053879, 0.08112093061208725, 0.10291420668363571, 0.12470748275518417, 0.14650076627731323, 0.1682940423488617, 0.19008731842041016, 0.21188059449195862, 0.23367387056350708, 0.25546714663505554, 0.277260422706604, 0.29905369877815247, 0.3208469748497009 ] } }, "transformer.text_embed.text_embed.weight": { "min": -2.7886247634887695, "max": 2.8676700592041016, "mean": -0.0003673351602628827, "std": 0.6154847145080566, "abs_mean": 0.4908738136291504, "sparsity": 0.0, "shape": [ 2546, 100 ], "norm": 310.559814453125, "elements": 254600, "histogram": { "counts": [ 2, 0, 5, 18, 33, 67, 90, 126, 130, 175, 125, 93, 50, 46, 21, 10, 6, 2, 0, 1 ], "bin_edges": [ -2.093568801879883, -1.8628169298171997, -1.6320650577545166, -1.401313304901123, -1.17056143283844, -0.9398095607757568, -0.7090578079223633, -0.4783059358596802, -0.24755406379699707, -0.016802310943603516, 0.21394968032836914, 0.4447014331817627, 0.6754531860351562, 0.9062051773071289, 1.1369569301605225, 1.3677089214324951, 1.5984606742858887, 1.8292124271392822, 2.059964179992676, 2.2907161712646484, 2.5214684009552 ] } }, "transformer.input_embed.proj.weight": { "min": -0.27889013290405273, "max": 0.38151732087135315, "mean": 0.0004236791573930532, "std": 0.04274853691458702, "abs_mean": 0.032939568161964417, "sparsity": 0.0, "shape": [ 1024, 300 ], "norm": 23.69471549987793, "elements": 307200, "histogram": { "counts": [ 2, 1, 4, 3, 9, 17, 37, 59, 96, 122, 139, 149, 149, 83, 59, 36, 22, 8, 3, 2 ], "bin_edges": [ -0.1740008443593979, -0.15822970867156982, -0.14245855808258057, -0.1266874223947525, -0.11091627925634384, -0.09514513611793518, -0.07937400043010712, -0.06360285729169846, -0.047831714153289795, -0.03206057846546173, -0.016289427876472473, -0.0005182921886444092, 0.015252843499183655, 0.031023994088172913, 0.04679512977600098, 0.06256628036499023, 0.0783374160528183, 0.09410856664180756, 0.10987968742847443, 0.12565083801746368, 0.14142198860645294 ] } }, "transformer.input_embed.proj.bias": { "min": -0.2219879925251007, "max": 0.2091645449399948, "mean": -0.004480332136154175, "std": 0.040872007608413696, "abs_mean": 0.03087138757109642, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.3151038885116577, "elements": 1024, "histogram": { "counts": [ 1, 2, 0, 5, 2, 12, 19, 65, 158, 197, 228, 197, 81, 17, 8, 1, 3, 1, 2, 1 ], "bin_edges": [ -0.2219879925251007, -0.20043036341667175, -0.1788727343082428, -0.15731512010097504, -0.13575749099254608, -0.11419986188411713, -0.09264224767684937, -0.07108461856842041, -0.049526989459991455, -0.0279693603515625, -0.006411731243133545, 0.015145882964134216, 0.03670349717140198, 0.05826112627983093, 0.07981875538825989, 0.10137638449668884, 0.1229340136051178, 0.14449164271354675, 0.1660492718219757, 0.18760690093040466, 0.2091645449399948 ] } }, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": { "min": -0.42831405997276306, "max": 0.47610175609588623, "mean": 3.7659003737644525e-06, "std": 0.024510981515049934, "abs_mean": 0.018337251618504524, "sparsity": 0.0, "shape": [ 1024, 64, 31 ], "norm": 34.935028076171875, "elements": 2031616, "histogram": { "counts": [ 2, 2, 6, 8, 19, 34, 59, 107, 163, 177, 135, 120, 73, 45, 27, 12, 6, 2, 1, 2 ], "bin_edges": [ -0.08790014684200287, -0.07873495668172836, -0.06956977397203445, -0.06040458381175995, -0.05123939737677574, -0.042074210941791534, -0.03290902078151703, -0.02374383807182312, -0.014578647911548615, -0.005413457751274109, 0.0037517249584198, 0.012916915118694305, 0.02208210527896881, 0.03124728798866272, 0.04041247069835663, 0.04957766830921173, 0.05874285101890564, 0.06790803372859955, 0.07707323133945465, 0.08623841404914856, 0.09540360420942307 ] } }, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": { "min": -0.3244315981864929, "max": 0.15647757053375244, "mean": -0.046661682426929474, "std": 0.05150889977812767, "abs_mean": 0.054327093064785004, "sparsity": 0.0, "shape": [ 1024 ], "norm": 2.223456382751465, "elements": 1024, "histogram": { "counts": [ 1, 1, 0, 1, 1, 1, 13, 28, 79, 113, 144, 147, 208, 162, 73, 18, 4, 3, 1, 2 ], "bin_edges": [ -0.3244315981864929, -0.30038613080978394, -0.27634069323539734, -0.25229522585868835, -0.22824975848197937, -0.20420430600643158, -0.1801588535308838, -0.1561133861541748, -0.13206793367862701, -0.10802248120307922, -0.08397701382637024, -0.059931546449661255, -0.03588610887527466, -0.011840641498565674, 0.01220482587814331, 0.03625026345252991, 0.06029573082923889, 0.08434119820594788, 0.10838663578033447, 0.13243210315704346, 0.15647757053375244 ] } }, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": { "min": -0.4104415476322174, "max": 0.3546721041202545, "mean": -0.00013054230657871813, "std": 0.02360478602349758, "abs_mean": 0.017416419461369514, "sparsity": 0.0, "shape": [ 1024, 64, 31 ], "norm": 33.64410400390625, "elements": 2031616, "histogram": { "counts": [ 2, 2, 2, 11, 14, 36, 81, 138, 248, 214, 123, 75, 29, 14, 6, 2, 1, 1, 0, 1 ], "bin_edges": [ -0.10315045714378357, -0.09150389581918716, -0.07985734194517136, -0.06821078062057495, -0.056564223021268845, -0.04491766542196274, -0.03327110409736633, -0.021624550223350525, -0.00997798889875412, 0.0016685724258422852, 0.013315126299858093, 0.0249616801738739, 0.0366082489490509, 0.04825480282306671, 0.05990135669708252, 0.07154792547225952, 0.08319447934627533, 0.09484103322029114, 0.10648760199546814, 0.11813415586948395, 0.12978070974349976 ] } }, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": { "min": -0.22924789786338806, "max": 0.2620227038860321, "mean": -0.029105938971042633, "std": 0.04928705468773842, "abs_mean": 0.042650409042835236, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.8310034275054932, "elements": 1024, "histogram": { "counts": [ 4, 4, 9, 14, 35, 76, 106, 172, 216, 218, 100, 36, 7, 2, 0, 0, 0, 0, 0, 1 ], "bin_edges": [ -0.22924789786338806, -0.20468436181545258, -0.18012084066867828, -0.1555573046207428, -0.1309937834739685, -0.10643024742603302, -0.08186671137809753, -0.05730319023132324, -0.032739654183387756, -0.00817611813545227, 0.01638740301132202, 0.040950924158096313, 0.065514475107193, 0.09007799625396729, 0.11464151740074158, 0.13920506834983826, 0.16376858949661255, 0.18833211064338684, 0.21289566159248352, 0.2374591827392578, 0.2620227038860321 ] } }, "transformer.layers.0.1.g": { "min": 0.2546031177043915, "max": 0.8185229301452637, "mean": 0.5252923965454102, "std": 0.08049347996711731, "abs_mean": 0.5252923965454102, "sparsity": 0.0, "shape": [ 1024 ], "norm": 17.005373001098633, "elements": 1024, "histogram": { "counts": [ 1, 2, 5, 14, 21, 66, 66, 100, 146, 167, 140, 83, 75, 53, 31, 13, 5, 6, 2, 4 ], "bin_edges": [ 0.2546031177043915, 0.28279909491539, 0.31099510192871094, 0.3391910791397095, 0.367387056350708, 0.39558306336402893, 0.42377904057502747, 0.4519750475883484, 0.4801710247993469, 0.5083670020103455, 0.5365630388259888, 0.5647590160369873, 0.5929549932479858, 0.6211509704589844, 0.6493469476699829, 0.6775429248809814, 0.70573890209198, 0.7339348793029785, 0.762130856513977, 0.7903269529342651, 0.8185229301452637 ] } }, "transformer.layers.0.2.to_q.weight": { "min": -0.296941339969635, "max": 0.2655627429485321, "mean": -0.0004258690751157701, "std": 0.03210259974002838, "abs_mean": 0.024999314919114113, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 32.87555694580078, "elements": 1048576, "histogram": { "counts": [ 1, 4, 5, 13, 29, 41, 80, 134, 166, 162, 131, 88, 69, 36, 14, 20, 3, 2, 0, 2 ], "bin_edges": [ -0.11262407153844833, -0.10048417001962662, -0.0883442685008049, -0.07620436698198318, -0.06406446546316147, -0.05192456394433975, -0.039784662425518036, -0.02764476090669632, -0.015504859387874603, -0.003364957869052887, 0.00877494364976883, 0.020914845168590546, 0.03305474668741226, 0.04519464820623398, 0.057334549725055695, 0.06947445124387741, 0.08161435276269913, 0.09375425428152084, 0.10589415580034256, 0.11803405731916428, 0.1301739513874054 ] } }, "transformer.layers.0.2.to_q.bias": { "min": -0.09266690164804459, "max": 0.12469176203012466, "mean": 0.0006477286806330085, "std": 0.025720255449414253, "abs_mean": 0.019480330869555473, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.8229072690010071, "elements": 1024, "histogram": { "counts": [ 1, 3, 12, 16, 26, 53, 109, 175, 195, 161, 112, 70, 34, 12, 9, 8, 0, 3, 0, 1 ], "bin_edges": [ -0.09266690164804459, -0.0817989706993103, -0.07093103229999542, -0.06006310135126114, -0.049195170402526855, -0.03832723945379257, -0.02745930105447769, -0.016591370105743408, -0.005723439157009125, 0.005144491791725159, 0.016012422740459442, 0.026880361139774323, 0.0377482995390892, 0.04861622303724289, 0.05948416143655777, 0.07035208493471146, 0.08122002333402634, 0.09208796173334122, 0.1029558852314949, 0.11382382363080978, 0.12469176203012466 ] } }, "transformer.layers.0.2.to_k.weight": { "min": -0.2905982434749603, "max": 0.28104421496391296, "mean": -7.510318391723558e-05, "std": 0.03093179315328598, "abs_mean": 0.023867137730121613, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 31.673906326293945, "elements": 1048576, "histogram": { "counts": [ 3, 4, 11, 12, 21, 53, 86, 111, 131, 143, 141, 111, 80, 36, 33, 13, 5, 2, 3, 1 ], "bin_edges": [ -0.1047215387225151, -0.09359753876924515, -0.08247353136539459, -0.07134953141212463, -0.060225531458854675, -0.04910153150558472, -0.03797752410173416, -0.026853524148464203, -0.015729524195194244, -0.004605524241924286, 0.006518475711345673, 0.017642483115196228, 0.028766490519046783, 0.03989049047231674, 0.0510144904255867, 0.06213849037885666, 0.07326249033212662, 0.08438649028539658, 0.09551049023866653, 0.10663449019193649, 0.11775848269462585 ] } }, "transformer.layers.0.2.to_k.bias": { "min": -5.890929698944092, "max": 5.805842876434326, "mean": -0.009318170137703419, "std": 1.2943130731582642, "abs_mean": 0.8134283423423767, "sparsity": 0.0, "shape": [ 1024 ], "norm": 41.39886474609375, "elements": 1024, "histogram": { "counts": [ 4, 9, 4, 3, 2, 3, 31, 44, 105, 260, 304, 120, 55, 30, 7, 4, 0, 4, 5, 6 ], "bin_edges": [ -5.890929698944092, -5.30609130859375, -4.72125244140625, -4.13641357421875, -3.551575183868408, -2.9667365550994873, -2.3818979263305664, -1.7970595359802246, -1.2122206687927246, -0.6273818016052246, -0.04254341125488281, 0.542294979095459, 1.127133846282959, 1.711972713470459, 2.2968106269836426, 2.8816494941711426, 3.4664883613586426, 4.051327228546143, 4.636166095733643, 5.221004009246826, 5.805842876434326 ] } }, "transformer.layers.0.2.to_v.weight": { "min": -0.42498156428337097, "max": 0.3436700105667114, "mean": 9.804974979488179e-05, "std": 0.029953550547361374, "abs_mean": 0.021628363057971, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 30.67213249206543, "elements": 1048576, "histogram": { "counts": [ 2, 10, 13, 26, 45, 154, 271, 282, 115, 54, 21, 4, 2, 0, 0, 0, 0, 0, 0, 1 ], "bin_edges": [ -0.1268398016691208, -0.10867056995630264, -0.0905013382434845, -0.07233210653066635, -0.054162874817848206, -0.03599364310503006, -0.017824411392211914, 0.0003448277711868286, 0.018514052033424377, 0.036683276295661926, 0.05485251545906067, 0.07302175462245941, 0.09119097888469696, 0.10936020314693451, 0.12752945721149445, 0.145698681473732, 0.16386790573596954, 0.1820371299982071, 0.20020635426044464, 0.21837560832500458, 0.23654483258724213 ] } }, "transformer.layers.0.2.to_v.bias": { "min": -0.029002565890550613, "max": 0.027599314227700233, "mean": -0.0003237572673242539, "std": 0.01257046777755022, "abs_mean": 0.010674269869923592, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.4021919369697571, "elements": 1024, "histogram": { "counts": [ 2, 18, 26, 44, 55, 60, 72, 89, 62, 66, 62, 59, 83, 80, 69, 51, 40, 31, 16, 15 ], "bin_edges": [ -0.029002565890550613, -0.026172472164034843, -0.023342378437519073, -0.020512282848358154, -0.017682189121842384, -0.014852095395326614, -0.012021999806165695, -0.009191906079649925, -0.006361812353134155, -0.0035317186266183853, -0.0007016249001026154, 0.002128470689058304, 0.004958566278219223, 0.007788658142089844, 0.010618753731250763, 0.013448845595121384, 0.016278941184282303, 0.019109036773443222, 0.021939128637313843, 0.024769224226474762, 0.027599314227700233 ] } }, "transformer.layers.0.2.to_out.0.weight": { "min": -0.45393431186676025, "max": 0.44807320833206177, "mean": 2.389570181549061e-05, "std": 0.023853935301303864, "abs_mean": 0.01586836948990822, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 24.425954818725586, "elements": 1048576, "histogram": { "counts": [ 1, 0, 0, 0, 2, 2, 3, 6, 13, 59, 227, 427, 185, 49, 16, 6, 3, 0, 0, 1 ], "bin_edges": [ -0.2009379267692566, -0.1832694113254547, -0.16560088098049164, -0.14793236553668976, -0.13026383519172668, -0.1125953197479248, -0.09492680430412292, -0.07725828140974045, -0.05958975851535797, -0.04192124307155609, -0.024252712726593018, -0.006584197282791138, 0.011084318161010742, 0.028752848505973816, 0.046421363949775696, 0.06408989429473877, 0.08175840973854065, 0.09942692518234253, 0.11709544062614441, 0.13476398587226868, 0.15243251621723175 ] } }, "transformer.layers.0.2.to_out.0.bias": { "min": -0.0885927751660347, "max": 0.09089276939630508, "mean": 0.0022863608319312334, "std": 0.019503755494952202, "abs_mean": 0.01498686708509922, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.6280912160873413, "elements": 1024, "histogram": { "counts": [ 2, 1, 4, 3, 4, 12, 27, 83, 144, 204, 181, 162, 85, 49, 21, 13, 4, 0, 0, 1 ], "bin_edges": [ -0.0885927751660347, -0.07961849868297577, -0.07064422219991684, -0.06166994199156761, -0.05269566550850868, -0.04372138902544975, -0.034747108817100525, -0.025772832334041595, -0.016798555850982666, -0.007824279367923737, 0.0011499971151351929, 0.010124273598194122, 0.01909855753183365, 0.028072834014892578, 0.03704711049795151, 0.04602137953042984, 0.054995663464069366, 0.06396994739770889, 0.07294421643018723, 0.08191850036382675, 0.09089276939630508 ] } }, "transformer.layers.0.3.g": { "min": 0.2667909264564514, "max": 1.0541586875915527, "mean": 0.5309650301933289, "std": 0.10402658581733704, "abs_mean": 0.5309650301933289, "sparsity": 0.0, "shape": [ 1024 ], "norm": 17.31359100341797, "elements": 1024, "histogram": { "counts": [ 2, 3, 35, 111, 141, 159, 163, 105, 77, 78, 60, 32, 13, 13, 2, 2, 0, 3, 0, 1 ], "bin_edges": [ 0.2667909264564514, 0.3061593174934387, 0.345527708530426, 0.38489609956741333, 0.42426449060440063, 0.46363288164138794, 0.5030012130737305, 0.5423696041107178, 0.5817379951477051, 0.6211063861846924, 0.6604747772216797, 0.699843168258667, 0.7392115592956543, 0.7785799503326416, 0.8179483413696289, 0.8573167324066162, 0.8966851234436035, 0.9360535144805908, 0.9754219055175781, 1.0147902965545654, 1.0541586875915527 ] } }, "transformer.layers.0.4.ff.0.0.weight": { "min": -0.5743634104728699, "max": 0.6081749796867371, "mean": -0.0004296167753636837, "std": 0.03860084339976311, "abs_mean": 0.02932225726544857, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 79.05066680908203, "elements": 4194304, "histogram": { "counts": [ 2, 2, 2, 5, 13, 20, 48, 103, 147, 198, 181, 143, 76, 38, 14, 3, 4, 0, 0, 1 ], "bin_edges": [ -0.16866450011730194, -0.15148140490055084, -0.13429829478263855, -0.11711519956588745, -0.09993210434913635, -0.08274900913238525, -0.06556590646505356, -0.04838280379772186, -0.031199708580970764, -0.014016613364219666, 0.003166481852531433, 0.020349591970443726, 0.037532687187194824, 0.05471578240394592, 0.07189889252185822, 0.08908198773860931, 0.10626508295536041, 0.12344817817211151, 0.1406312733888626, 0.1578143686056137, 0.1749974638223648 ] } }, "transformer.layers.0.4.ff.0.0.bias": { "min": -0.18247899413108826, "max": 0.04562002047896385, "mean": -0.029428046196699142, "std": 0.04256246238946915, "abs_mean": 0.03553260490298271, "sparsity": 0.0, "shape": [ 4096 ], "norm": 3.3114237785339355, "elements": 4096, "histogram": { "counts": [ 1, 3, 6, 9, 17, 31, 42, 43, 40, 38, 28, 32, 48, 80, 143, 181, 157, 80, 20, 1 ], "bin_edges": [ -0.17975062131881714, -0.16848209500312805, -0.15721355378627777, -0.14594502747058868, -0.1346764862537384, -0.12340795993804932, -0.11213943362236023, -0.10087089985609055, -0.08960236608982086, -0.07833383232355118, -0.0670652985572815, -0.05579677224159241, -0.04452824592590332, -0.03325970470905304, -0.021991178393363953, -0.010722637176513672, 0.000545889139175415, 0.011814415454864502, 0.023082956671714783, 0.03435148298740387, 0.04562002047896385 ] } }, "transformer.layers.0.4.ff.2.weight": { "min": -1.1666945219039917, "max": 1.633580207824707, "mean": 0.00032344614737667143, "std": 0.027696726843714714, "abs_mean": 0.02006993629038334, "sparsity": 0.0, "shape": [ 1024, 4096 ], "norm": 56.71977615356445, "elements": 4194304, "histogram": { "counts": [ 1, 0, 0, 1, 39, 262, 490, 188, 14, 3, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1 ], "bin_edges": [ -0.2090015709400177, -0.176393523812294, -0.1437854766845703, -0.11117742955684662, -0.07856938242912292, -0.04596133530139923, -0.013353288173675537, 0.019254758954048157, 0.05186280608177185, 0.08447083830833435, 0.11707890033721924, 0.14968696236610413, 0.18229499459266663, 0.21490302681922913, 0.247511088848114, 0.2801191508769989, 0.3127271831035614, 0.3453352153301239, 0.3779432475566864, 0.4105513393878937, 0.4431593716144562 ] } }, "transformer.layers.0.4.ff.2.bias": { "min": -0.16206279397010803, "max": 0.20534056425094604, "mean": -0.02111881598830223, "std": 0.027917111292481422, "abs_mean": 0.027663614600896835, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.1198209524154663, "elements": 1024, "histogram": { "counts": [ 1, 0, 2, 7, 19, 84, 198, 278, 259, 99, 37, 10, 3, 0, 0, 2, 0, 0, 0, 1 ], "bin_edges": [ -0.16206279397010803, -0.14369262754917145, -0.12532246112823486, -0.10695228725671768, -0.0885821208357811, -0.07021195441484451, -0.05184178054332733, -0.03347161412239075, -0.015101447701454163, 0.003268718719482422, 0.021638885140419006, 0.04000905156135559, 0.05837923288345337, 0.07674939930438995, 0.09511956572532654, 0.11348971724510193, 0.1318598985671997, 0.15023007988929749, 0.16860023140907288, 0.18697041273117065, 0.20534056425094604 ] } }, "transformer.layers.1.1.g": { "min": 0.22404542565345764, "max": 0.8422443866729736, "mean": 0.4874877631664276, "std": 0.07493799924850464, "abs_mean": 0.4874877631664276, "sparsity": 0.0, "shape": [ 1024 ], "norm": 15.782669067382812, "elements": 1024, "histogram": { "counts": [ 1, 6, 9, 13, 36, 66, 116, 169, 178, 152, 118, 72, 28, 20, 8, 3, 2, 0, 0, 3 ], "bin_edges": [ 0.22404542565345764, 0.25495538115501404, 0.28586533665657043, 0.31677526235580444, 0.34768521785736084, 0.37859517335891724, 0.40950512886047363, 0.44041508436203003, 0.4713250398635864, 0.502234935760498, 0.5331449508666992, 0.5640548467636108, 0.594964861869812, 0.6258747577667236, 0.65678471326828, 0.6876946687698364, 0.7186046242713928, 0.7495145797729492, 0.7804244756698608, 0.811334490776062, 0.8422443866729736 ] } }, "transformer.layers.1.2.to_q.weight": { "min": -0.255166620016098, "max": 0.305690199136734, "mean": -6.7684013629332185e-06, "std": 0.03347513824701309, "abs_mean": 0.02612040936946869, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 34.27812957763672, "elements": 1048576, "histogram": { "counts": [ 3, 4, 6, 5, 31, 52, 79, 127, 150, 159, 135, 95, 70, 36, 27, 13, 7, 0, 0, 1 ], "bin_edges": [ -0.11421064287424088, -0.1019379198551178, -0.08966518938541412, -0.07739246636629105, -0.06511974334716797, -0.05284702032804489, -0.04057428985834122, -0.02830156683921814, -0.016028843820095062, -0.003756120800971985, 0.008516602218151093, 0.020789332687854767, 0.03306206315755844, 0.04533477872610092, 0.057607509195804596, 0.06988022476434708, 0.08215295523405075, 0.09442568570375443, 0.1066984012722969, 0.11897113174200058, 0.13124385476112366 ] } }, "transformer.layers.1.2.to_q.bias": { "min": -0.09524397552013397, "max": 0.11034096777439117, "mean": 6.5918720792979e-05, "std": 0.026950189843773842, "abs_mean": 0.021233120933175087, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.8619874715805054, "elements": 1024, "histogram": { "counts": [ 4, 1, 3, 9, 24, 52, 99, 122, 165, 138, 126, 118, 63, 46, 9, 9, 7, 1, 1, 3 ], "bin_edges": [ -0.09524397552013397, -0.08496472984552383, -0.0746854841709137, -0.06440623104572296, -0.054126985371112823, -0.043847739696502686, -0.03356849029660225, -0.023289240896701813, -0.013009995222091675, -0.002730749547481537, 0.007548496127128601, 0.017827749252319336, 0.028106994926929474, 0.03838624060153961, 0.04866549372673035, 0.05894473195075989, 0.06922398507595062, 0.07950323820114136, 0.0897824764251709, 0.10006172955036163, 0.11034096777439117 ] } }, "transformer.layers.1.2.to_k.weight": { "min": -0.29684391617774963, "max": 0.295682817697525, "mean": 5.335842797649093e-05, "std": 0.03254625201225281, "abs_mean": 0.025315403938293457, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 33.326988220214844, "elements": 1048576, "histogram": { "counts": [ 2, 1, 5, 10, 11, 37, 78, 121, 160, 157, 154, 111, 76, 42, 20, 6, 5, 2, 1, 1 ], "bin_edges": [ -0.12574371695518494, -0.11253877729177475, -0.09933383762836456, -0.08612889796495438, -0.07292395830154419, -0.059719018638134, -0.046514078974723816, -0.03330913931131363, -0.020104199647903442, -0.006899259984493256, 0.006305679678916931, 0.019510626792907715, 0.032715559005737305, 0.045920491218566895, 0.05912543833255768, 0.07233038544654846, 0.08553531765937805, 0.09874024987220764, 0.11194519698619843, 0.1251501441001892, 0.1383550763130188 ] } }, "transformer.layers.1.2.to_k.bias": { "min": -5.156938552856445, "max": 5.0772905349731445, "mean": -0.014555896632373333, "std": 1.1561553478240967, "abs_mean": 0.6372154355049133, "sparsity": 0.0, "shape": [ 1024 ], "norm": 36.981834411621094, "elements": 1024, "histogram": { "counts": [ 6, 10, 2, 7, 8, 6, 13, 31, 72, 308, 364, 75, 45, 21, 10, 5, 0, 7, 2, 8 ], "bin_edges": [ -5.156938552856445, -4.645226955413818, -4.133515357971191, -3.6218042373657227, -3.1100926399230957, -2.5983810424804688, -2.086669683456421, -1.574958324432373, -1.063246726989746, -0.5515351295471191, -0.03982353210449219, 0.47188758850097656, 0.9835991859436035, 1.4953107833862305, 2.007021903991699, 2.518733501434326, 3.030445098876953, 3.542156219482422, 4.053868293762207, 4.565579414367676, 5.0772905349731445 ] } }, "transformer.layers.1.2.to_v.weight": { "min": -0.3448536694049835, "max": 0.34325698018074036, "mean": 7.860038749640808e-05, "std": 0.0300619974732399, "abs_mean": 0.02246847189962864, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 30.78302574157715, "elements": 1048576, "histogram": { "counts": [ 2, 2, 11, 37, 72, 152, 269, 249, 120, 58, 17, 2, 3, 2, 0, 0, 0, 1, 1, 2 ], "bin_edges": [ -0.11359164118766785, -0.09682722389698029, -0.08006280660629272, -0.06329838931560516, -0.0465339720249176, -0.02976955473423004, -0.01300513744354248, 0.0037592798471450806, 0.02052369713783264, 0.0372881144285202, 0.054052531719207764, 0.07081694900989532, 0.08758136630058289, 0.10434578359127045, 0.12111020088195801, 0.13787463307380676, 0.15463903546333313, 0.1714034378528595, 0.18816787004470825, 0.204932302236557, 0.22169671952724457 ] } }, "transformer.layers.1.2.to_v.bias": { "min": -0.03601115196943283, "max": 0.03331650421023369, "mean": -0.0001408920797985047, "std": 0.013034623116254807, "abs_mean": 0.01087227649986744, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.41692858934402466, "elements": 1024, "histogram": { "counts": [ 3, 4, 10, 26, 38, 69, 64, 76, 82, 99, 83, 79, 95, 82, 68, 61, 35, 16, 7, 3 ], "bin_edges": [ -0.03601115196943283, -0.03254476934671402, -0.02907838672399521, -0.025612004101276398, -0.022145621478557587, -0.018679238855838776, -0.015212856233119965, -0.011746473610401154, -0.008280090987682343, -0.0048137083649635315, -0.0013473257422447205, 0.0021190568804740906, 0.005585439503192902, 0.009051822125911713, 0.012518204748630524, 0.015984587371349335, 0.019450969994068146, 0.022917352616786957, 0.026383735239505768, 0.02985011786222458, 0.03331650421023369 ] } }, "transformer.layers.1.2.to_out.0.weight": { "min": -0.31532466411590576, "max": 0.3747538924217224, "mean": -2.0682646209024824e-05, "std": 0.024059493094682693, "abs_mean": 0.017171449959278107, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 24.636526107788086, "elements": 1048576, "histogram": { "counts": [ 2, 0, 2, 0, 4, 0, 8, 16, 50, 125, 272, 278, 150, 57, 24, 6, 3, 0, 2, 1 ], "bin_edges": [ -0.15282730758190155, -0.13901641964912415, -0.12520551681518555, -0.11139462888240814, -0.09758373349905014, -0.08377283811569214, -0.06996195018291473, -0.05615105479955673, -0.04234015941619873, -0.02852926403284073, -0.014718368649482727, -0.0009074807167053223, 0.012903407216072083, 0.02671431005001068, 0.040525197982788086, 0.054336100816726685, 0.06814698874950409, 0.0819578766822815, 0.09576877951622009, 0.1095796674489975, 0.1233905628323555 ] } }, "transformer.layers.1.2.to_out.0.bias": { "min": -0.10526668280363083, "max": 0.12198653072118759, "mean": -0.001968209631741047, "std": 0.0288400761783123, "abs_mean": 0.022421324625611305, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.924579381942749, "elements": 1024, "histogram": { "counts": [ 1, 1, 4, 17, 23, 48, 103, 133, 171, 146, 154, 83, 56, 26, 17, 4, 8, 3, 1, 1 ], "bin_edges": [ -0.10526668280363083, -0.09390401840209961, -0.08254136145114899, -0.07117870450019836, -0.059816040098667145, -0.048453379422426224, -0.0370907187461853, -0.025728054344654083, -0.01436539739370346, -0.003002740442752838, 0.008359923958778381, 0.0197225883603096, 0.031085245311260223, 0.042447902262210846, 0.05381057411432266, 0.06517323106527328, 0.07653588801622391, 0.08789854496717453, 0.09926120191812515, 0.11062387377023697, 0.12198653072118759 ] } }, "transformer.layers.1.3.g": { "min": 0.3114672601222992, "max": 1.1185976266860962, "mean": 0.6660763025283813, "std": 0.09736555069684982, "abs_mean": 0.6660763025283813, "sparsity": 0.0, "shape": [ 1024 ], "norm": 21.540740966796875, "elements": 1024, "histogram": { "counts": [ 4, 3, 11, 18, 20, 54, 89, 129, 211, 200, 97, 77, 47, 27, 7, 1, 4, 0, 0, 1 ], "bin_edges": [ 0.3114672601222992, 0.3518237769603729, 0.39218029379844666, 0.4325368106365204, 0.4728933274745941, 0.5132498741149902, 0.553606390953064, 0.5939629077911377, 0.6343194246292114, 0.6746759414672852, 0.7150324583053589, 0.7553889751434326, 0.7957454919815063, 0.8361020088195801, 0.8764585256576538, 0.9168150424957275, 0.9571715593338013, 0.997528076171875, 1.0378845930099487, 1.0782411098480225, 1.1185976266860962 ] } }, "transformer.layers.1.4.ff.0.0.weight": { "min": -0.872668981552124, "max": 0.6275054216384888, "mean": 0.0016755885444581509, "std": 0.04743882641196251, "abs_mean": 0.035196080803871155, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 97.20372009277344, "elements": 4194304, "histogram": { "counts": [ 1, 0, 0, 0, 0, 1, 5, 6, 13, 48, 132, 221, 272, 179, 78, 22, 11, 5, 4, 2 ], "bin_edges": [ -0.33325523138046265, -0.3059375286102295, -0.27861982583999634, -0.2513021230697632, -0.22398442029953003, -0.19666671752929688, -0.16934901475906372, -0.14203131198883057, -0.11471360921859741, -0.08739590644836426, -0.060078203678131104, -0.03276050090789795, -0.005442798137664795, 0.02187490463256836, 0.049192607402801514, 0.07651031017303467, 0.10382801294326782, 0.13114571571350098, 0.15846341848373413, 0.18578112125396729, 0.21309885382652283 ] } }, "transformer.layers.1.4.ff.0.0.bias": { "min": -0.2710971236228943, "max": 0.03426326811313629, "mean": -0.0465819425880909, "std": 0.04054969921708107, "abs_mean": 0.04834957420825958, "sparsity": 0.0, "shape": [ 4096 ], "norm": 3.9523580074310303, "elements": 4096, "histogram": { "counts": [ 1, 0, 1, 5, 4, 5, 4, 8, 26, 35, 66, 77, 68, 117, 105, 126, 128, 137, 68, 19 ], "bin_edges": [ -0.2314978539943695, -0.2185397893190384, -0.20558173954486847, -0.19262367486953735, -0.17966562509536743, -0.16670756042003632, -0.1537494957447052, -0.14079144597053528, -0.12783338129520416, -0.11487532407045364, -0.10191726684570312, -0.08895920217037201, -0.0760011374950409, -0.06304308772087097, -0.050085023045539856, -0.037126973271369934, -0.02416890859603882, -0.011210843920707703, 0.0017472058534622192, 0.014705270528793335, 0.02766331285238266 ] } }, "transformer.layers.1.4.ff.2.weight": { "min": -0.922234833240509, "max": 0.9643772840499878, "mean": 0.0010214494541287422, "std": 0.04070669412612915, "abs_mean": 0.027846619486808777, "sparsity": 0.0, "shape": [ 1024, 4096 ], "norm": 83.38308715820312, "elements": 4194304, "histogram": { "counts": [ 1, 0, 1, 1, 2, 4, 4, 23, 79, 267, 354, 183, 50, 14, 9, 3, 1, 2, 1, 1 ], "bin_edges": [ -0.2802606523036957, -0.25296658277511597, -0.22567248344421387, -0.19837841391563416, -0.17108432948589325, -0.14379024505615234, -0.11649617552757263, -0.08920209109783173, -0.06190800666809082, -0.034613922238349915, -0.007319837808609009, 0.019974231719970703, 0.047268301248550415, 0.07456240057945251, 0.10185647010803223, 0.12915056943893433, 0.15644463896751404, 0.18373870849609375, 0.21103280782699585, 0.23832687735557556, 0.26562100648880005 ] } }, "transformer.layers.1.4.ff.2.bias": { "min": -0.14429129660129547, "max": 0.07484762370586395, "mean": -0.00908473040908575, "std": 0.025672495365142822, "abs_mean": 0.020597003400325775, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.8710619211196899, "elements": 1024, "histogram": { "counts": [ 1, 0, 1, 1, 2, 2, 10, 23, 39, 71, 106, 150, 195, 182, 111, 56, 29, 15, 3, 3 ], "bin_edges": [ -0.14429129660129547, -0.13333435356616974, -0.12237740308046341, -0.11142046004533768, -0.10046350955963135, -0.08950656652450562, -0.07854962348937988, -0.06759267300367355, -0.05663572996854782, -0.04567878693342209, -0.03472183644771576, -0.023764893412590027, -0.012807950377464294, -0.001851007342338562, 0.009105950593948364, 0.020062893629074097, 0.03101983666419983, 0.04197677969932556, 0.052933722734451294, 0.06389068067073822, 0.07484762370586395 ] } }, "transformer.layers.2.1.g": { "min": 0.2402428686618805, "max": 0.711609423160553, "mean": 0.44710344076156616, "std": 0.05906940996646881, "abs_mean": 0.44710344076156616, "sparsity": 0.0, "shape": [ 1024 ], "norm": 14.431511878967285, "elements": 1024, "histogram": { "counts": [ 1, 1, 8, 19, 24, 59, 107, 162, 181, 162, 113, 69, 43, 23, 10, 10, 2, 4, 1, 1 ], "bin_edges": [ 0.2402428686618805, 0.2638112008571625, 0.28737953305244446, 0.31094783544540405, 0.33451616764068604, 0.358084499835968, 0.38165283203125, 0.405221164226532, 0.42878949642181396, 0.45235782861709595, 0.47592616081237793, 0.4994944930076599, 0.5230628252029419, 0.5466310977935791, 0.5701994895935059, 0.5937677621841431, 0.6173361539840698, 0.640904426574707, 0.6644728183746338, 0.688041090965271, 0.711609423160553 ] } }, "transformer.layers.2.2.to_q.weight": { "min": -0.27207210659980774, "max": 0.29753801226615906, "mean": 9.350538675789721e-06, "std": 0.035469669848680496, "abs_mean": 0.027727492153644562, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 36.32040023803711, "elements": 1048576, "histogram": { "counts": [ 1, 0, 0, 6, 10, 15, 32, 57, 94, 124, 148, 143, 143, 90, 66, 37, 23, 7, 1, 3 ], "bin_edges": [ -0.14873576164245605, -0.13538120687007904, -0.12202665209770203, -0.10867208987474442, -0.0953175351023674, -0.08196298032999039, -0.06860841810703278, -0.05525386333465576, -0.04189930856227875, -0.028544753789901733, -0.01519019901752472, -0.001835644245147705, 0.011518925428390503, 0.024873480200767517, 0.03822803497314453, 0.051582589745521545, 0.06493714451789856, 0.07829169929027557, 0.09164625406265259, 0.1050008237361908, 0.11835535615682602 ] } }, "transformer.layers.2.2.to_q.bias": { "min": -0.11918215453624725, "max": 0.1183757483959198, "mean": 0.0007599537493661046, "std": 0.027609599754214287, "abs_mean": 0.020609423518180847, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.8834105134010315, "elements": 1024, "histogram": { "counts": [ 1, 2, 4, 5, 9, 10, 37, 84, 141, 188, 201, 134, 88, 51, 24, 10, 7, 2, 1, 1 ], "bin_edges": [ -0.11918215453624725, -0.10730426013469696, -0.09542636573314667, -0.08354847133159637, -0.07167057693004608, -0.05979267880320549, -0.0479147806763649, -0.036036886274814606, -0.024158991873264313, -0.01228109747171402, -0.0004032030701637268, 0.011474698781967163, 0.023352593183517456, 0.03523048758506775, 0.04710838198661804, 0.058986276388168335, 0.07086417078971863, 0.08274206519126892, 0.09461995959281921, 0.1064978539943695, 0.1183757483959198 ] } }, "transformer.layers.2.2.to_k.weight": { "min": -0.2805421054363251, "max": 0.2793859839439392, "mean": -7.715764513704926e-05, "std": 0.035099178552627563, "abs_mean": 0.027376187965273857, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 35.94107437133789, "elements": 1048576, "histogram": { "counts": [ 3, 2, 2, 7, 17, 20, 42, 64, 102, 133, 120, 140, 117, 88, 57, 35, 24, 17, 5, 5 ], "bin_edges": [ -0.12107668071985245, -0.11005239933729172, -0.09902812540531158, -0.08800384402275085, -0.07697956264019012, -0.06595528870820999, -0.05493100732564926, -0.04390673339366913, -0.0328824520111084, -0.02185817062854767, -0.010833896696567535, 0.00019038468599319458, 0.011214666068553925, 0.022238947451114655, 0.03326321393251419, 0.04428749531507492, 0.05531177669763565, 0.06633605808019638, 0.07736033946275711, 0.08838460594415665, 0.09940888732671738 ] } }, "transformer.layers.2.2.to_k.bias": { "min": -2.506035566329956, "max": 2.518012046813965, "mean": 0.026713747531175613, "std": 0.5862806439399719, "abs_mean": 0.39693859219551086, "sparsity": 0.0, "shape": [ 1024 ], "norm": 18.77129364013672, "elements": 1024, "histogram": { "counts": [ 1, 1, 2, 4, 10, 27, 28, 52, 128, 249, 256, 101, 57, 31, 19, 9, 7, 10, 4, 4 ], "bin_edges": [ -2.506035566329956, -2.254833221435547, -2.0036306381225586, -1.7524282932281494, -1.5012259483337402, -1.250023603439331, -0.9988211393356323, -0.7476186752319336, -0.4964163303375244, -0.24521398544311523, 0.005988359451293945, 0.2571909427642822, 0.5083932876586914, 0.7595956325531006, 1.0107982158660889, 1.262000560760498, 1.5132029056549072, 1.7644054889678955, 2.0156075954437256, 2.266810178756714, 2.518012046813965 ] } }, "transformer.layers.2.2.to_v.weight": { "min": -0.22091814875602722, "max": 0.27132153511047363, "mean": 2.8913418645970523e-06, "std": 0.0307327788323164, "abs_mean": 0.023796094581484795, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 31.469972610473633, "elements": 1048576, "histogram": { "counts": [ 3, 2, 6, 11, 31, 45, 87, 131, 146, 159, 142, 90, 67, 37, 21, 10, 5, 2, 3, 2 ], "bin_edges": [ -0.10520875453948975, -0.09382228553295135, -0.08243580907583237, -0.07104934006929398, -0.059662867337465286, -0.0482763946056366, -0.036889925599098206, -0.025503449141979218, -0.014116980135440826, -0.0027305111289024353, 0.008655965328216553, 0.020042434334754944, 0.031428903341293335, 0.042815372347831726, 0.05420185625553131, 0.0655883252620697, 0.0769747942686081, 0.08836126327514648, 0.09974773228168488, 0.11113421618938446, 0.12252067774534225 ] } }, "transformer.layers.2.2.to_v.bias": { "min": -0.03352135419845581, "max": 0.03120853193104267, "mean": 0.00011218251165701076, "std": 0.012406233698129654, "abs_mean": 0.010367151349782944, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.3968217968940735, "elements": 1024, "histogram": { "counts": [ 1, 2, 11, 27, 43, 52, 82, 86, 82, 81, 79, 91, 87, 74, 69, 56, 34, 27, 11, 5 ], "bin_edges": [ -0.03352135419845581, -0.030284859240055084, -0.027048366144299507, -0.02381187118589878, -0.020575378090143204, -0.017338883131742477, -0.014102388173341751, -0.010865895077586174, -0.007629400119185448, -0.004392905160784721, -0.0011564120650291443, 0.002080082893371582, 0.005316577851772308, 0.008553072810173035, 0.011789564043283463, 0.015026059001684189, 0.018262553960084915, 0.02149904891848564, 0.024735543876886368, 0.027972035109996796, 0.03120853193104267 ] } }, "transformer.layers.2.2.to_out.0.weight": { "min": -0.2351619005203247, "max": 0.23147742450237274, "mean": 5.6937635235954076e-05, "std": 0.0256962887942791, "abs_mean": 0.019686853513121605, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 26.312692642211914, "elements": 1048576, "histogram": { "counts": [ 1, 0, 2, 0, 1, 4, 10, 21, 55, 142, 188, 196, 179, 107, 65, 16, 6, 4, 2, 1 ], "bin_edges": [ -0.14605475962162018, -0.1331682950258255, -0.12028183043003082, -0.10739536583423615, -0.09450890123844147, -0.08162243664264679, -0.06873597204685211, -0.055849507451057434, -0.042963042855262756, -0.03007657825946808, -0.0171901136636734, -0.004303649067878723, 0.008582815527915955, 0.021469280123710632, 0.03435574471950531, 0.04724220931529999, 0.060128673911094666, 0.07301513850688934, 0.08590160310268402, 0.0987880676984787, 0.11167454719543457 ] } }, "transformer.layers.2.2.to_out.0.bias": { "min": -0.1356453150510788, "max": 0.1271977722644806, "mean": -0.005494291428476572, "std": 0.0399438738822937, "abs_mean": 0.03177820146083832, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.2896206378936768, "elements": 1024, "histogram": { "counts": [ 1, 2, 7, 18, 27, 46, 68, 92, 105, 146, 131, 102, 117, 64, 33, 18, 8, 6, 4, 5 ], "bin_edges": [ -0.1356453150510788, -0.12250316143035889, -0.10936100780963898, -0.09621885418891907, -0.08307670056819916, -0.06993454694747925, -0.05679239332675934, -0.04365023970603943, -0.03050808608531952, -0.01736593246459961, -0.0042237788438797, 0.00891837477684021, 0.02206052839756012, 0.03520268201828003, 0.04834483563899994, 0.06148698925971985, 0.07462914288043976, 0.08777129650115967, 0.10091345012187958, 0.11405560374259949, 0.1271977722644806 ] } }, "transformer.layers.2.3.g": { "min": 0.3544028699398041, "max": 1.1697261333465576, "mean": 0.7103750109672546, "std": 0.10338432341814041, "abs_mean": 0.7103750109672546, "sparsity": 0.0, "shape": [ 1024 ], "norm": 22.97124481201172, "elements": 1024, "histogram": { "counts": [ 2, 5, 14, 33, 41, 38, 82, 136, 164, 153, 148, 123, 40, 10, 4, 2, 3, 1, 0, 1 ], "bin_edges": [ 0.3544028699398041, 0.3951690196990967, 0.43593519926071167, 0.4767013490200043, 0.5174674987792969, 0.5582336783409119, 0.5989998579025269, 0.6397659778594971, 0.6805321574211121, 0.721298336982727, 0.7620644569396973, 0.8028306365013123, 0.8435968160629272, 0.8843629360198975, 0.9251291751861572, 0.9658952951431274, 1.0066614151000977, 1.0474276542663574, 1.0881937742233276, 1.1289598941802979, 1.1697261333465576 ] } }, "transformer.layers.2.4.ff.0.0.weight": { "min": -0.6172477006912231, "max": 0.5542004108428955, "mean": 0.001160221640020609, "std": 0.046119727194309235, "abs_mean": 0.03523973003029823, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 94.47351837158203, "elements": 4194304, "histogram": { "counts": [ 1, 0, 0, 3, 3, 7, 18, 37, 76, 106, 169, 199, 171, 119, 57, 19, 8, 1, 5, 1 ], "bin_edges": [ -0.2309630662202835, -0.21040646731853485, -0.1898498833179474, -0.16929328441619873, -0.14873668551445007, -0.12818008661270142, -0.10762349516153336, -0.0870669037103653, -0.06651030480861664, -0.04595370590686798, -0.025397107005119324, -0.00484052300453186, 0.015716075897216797, 0.03627265989780426, 0.05682925879955292, 0.07738585770130157, 0.09794245660305023, 0.11849905550479889, 0.13905565440654755, 0.1596122533082962, 0.18016882240772247 ] } }, "transformer.layers.2.4.ff.0.0.bias": { "min": -0.18825410306453705, "max": 0.024966172873973846, "mean": -0.03482227772474289, "std": 0.02857418917119503, "abs_mean": 0.03584485873579979, "sparsity": 0.0, "shape": [ 4096 ], "norm": 2.8827545642852783, "elements": 4096, "histogram": { "counts": [ 2, 1, 2, 0, 0, 2, 2, 8, 16, 19, 39, 62, 65, 100, 136, 167, 173, 124, 61, 21 ], "bin_edges": [ -0.18825410306453705, -0.17785300314426422, -0.1674518883228302, -0.15705078840255737, -0.14664968848228455, -0.13624857366085052, -0.1258474737405777, -0.11544636636972427, -0.10504525899887085, -0.09464415162801743, -0.084243044257164, -0.07384194433689117, -0.06344083696603775, -0.053039729595184326, -0.0426386296749115, -0.03223751485347748, -0.02183641493320465, -0.011435315012931824, -0.0010342001914978027, 0.009366899728775024, 0.019768016412854195 ] } }, "transformer.layers.2.4.ff.2.weight": { "min": -1.130850911140442, "max": 0.9707417488098145, "mean": 0.0003595067828428, "std": 0.042347487062215805, "abs_mean": 0.028345687314867973, "sparsity": 0.0, "shape": [ 1024, 4096 ], "norm": 86.71993255615234, "elements": 4194304, "histogram": { "counts": [ 1, 0, 0, 0, 0, 0, 2, 4, 6, 16, 193, 527, 203, 29, 6, 8, 2, 1, 0, 2 ], "bin_edges": [ -0.5004795789718628, -0.457177996635437, -0.41387641429901123, -0.37057486176490784, -0.32727327942848206, -0.2839716970920563, -0.24067014455795288, -0.1973685622215271, -0.15406697988510132, -0.11076539754867554, -0.06746381521224976, -0.024162262678146362, 0.01913928985595703, 0.06244087219238281, 0.1057424545288086, 0.14904403686523438, 0.19234561920166016, 0.23564720153808594, 0.2789487838745117, 0.3222503662109375, 0.3655519187450409 ] } }, "transformer.layers.2.4.ff.2.bias": { "min": -0.5971466898918152, "max": 0.06270916759967804, "mean": -0.004877141211181879, "std": 0.02859053947031498, "abs_mean": 0.0179099403321743, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.9276728630065918, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 71, 449, 428, 48 ], "bin_edges": [ -0.5971466898918152, -0.5641539096832275, -0.5311611294746399, -0.49816831946372986, -0.4651755094528198, -0.4321827292442322, -0.39918994903564453, -0.3661971688270569, -0.33320435881614685, -0.3002115786075592, -0.26721876859664917, -0.23422598838806152, -0.20123320817947388, -0.16824039816856384, -0.1352476179599762, -0.10225480794906616, -0.06926202774047852, -0.03626924753189087, -0.0032764673233032227, 0.0297163724899292, 0.06270916759967804 ] } }, "transformer.layers.3.1.g": { "min": 0.3752330243587494, "max": 0.9386839866638184, "mean": 0.5923458337783813, "std": 0.06656130403280258, "abs_mean": 0.5923458337783813, "sparsity": 0.0, "shape": [ 1024 ], "norm": 19.07424545288086, "elements": 1024, "histogram": { "counts": [ 1, 2, 5, 21, 44, 127, 226, 222, 136, 74, 45, 30, 30, 12, 15, 4, 3, 2, 0, 1 ], "bin_edges": [ 0.3752330243587494, 0.4034055769443512, 0.431578129529953, 0.4597506523132324, 0.48792320489883423, 0.516095757484436, 0.5442683100700378, 0.5724408626556396, 0.6006134152412415, 0.6287859678268433, 0.6569584608078003, 0.6851310729980469, 0.7133035659790039, 0.7414761781692505, 0.7696486711502075, 0.7978212237358093, 0.8259937763214111, 0.8541663289070129, 0.8823388814926147, 0.9105113744735718, 0.9386839866638184 ] } }, "transformer.layers.3.2.to_q.weight": { "min": -0.3911682367324829, "max": 0.3688437342643738, "mean": 7.119165093172342e-05, "std": 0.037188753485679626, "abs_mean": 0.02899729274213314, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 38.08082580566406, "elements": 1048576, "histogram": { "counts": [ 1, 1, 9, 10, 18, 47, 66, 127, 164, 197, 152, 97, 51, 32, 19, 2, 2, 3, 1, 1 ], "bin_edges": [ -0.138386532664299, -0.12326744198799133, -0.10814835131168365, -0.09302926063537598, -0.0779101699590683, -0.06279107928276062, -0.04767198860645294, -0.032552897930145264, -0.017433807253837585, -0.0023147165775299072, 0.012804374098777771, 0.02792346477508545, 0.04304255545139313, 0.058161646127700806, 0.07328073680400848, 0.08839982748031616, 0.10351891815662384, 0.11863799393177032, 0.1337570995092392, 0.14887620508670807, 0.16399529576301575 ] } }, "transformer.layers.3.2.to_q.bias": { "min": -0.11875540018081665, "max": 0.13628698885440826, "mean": 0.0009287752327509224, "std": 0.029227793216705322, "abs_mean": 0.021544134244322777, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.9353049397468567, "elements": 1024, "histogram": { "counts": [ 2, 4, 1, 5, 9, 31, 61, 130, 190, 210, 153, 93, 62, 19, 10, 9, 5, 4, 1, 1 ], "bin_edges": [ -0.11875540018081665, -0.1060032844543457, -0.09325116127729416, -0.08049904555082321, -0.06774692237377167, -0.05499480664730072, -0.04224269092082977, -0.02949056774377823, -0.01673845201730728, -0.003986336290836334, 0.00876578688621521, 0.021517902612686157, 0.034270018339157104, 0.04702213406562805, 0.05977426469326019, 0.07252638041973114, 0.08527849614620209, 0.09803061187267303, 0.11078272759914398, 0.12353485822677612, 0.13628698885440826 ] } }, "transformer.layers.3.2.to_k.weight": { "min": -0.6185974478721619, "max": 0.5083587169647217, "mean": 1.5249222997226752e-05, "std": 0.036442261189222336, "abs_mean": 0.02839103899896145, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 37.31633758544922, "elements": 1048576, "histogram": { "counts": [ 2, 5, 12, 31, 69, 120, 183, 181, 183, 111, 60, 23, 15, 1, 3, 0, 0, 0, 0, 1 ], "bin_edges": [ -0.12961581349372864, -0.11215391755104065, -0.09469202160835266, -0.07723013311624527, -0.05976823717355728, -0.04230634123086929, -0.024844452738761902, -0.007382556796073914, 0.010079339146614075, 0.027541235089302063, 0.04500313103199005, 0.06246502697467804, 0.07992690801620483, 0.09738880395889282, 0.11485069990158081, 0.1323125958442688, 0.1497744917869568, 0.16723638772964478, 0.18469828367233276, 0.20216017961502075, 0.21962207555770874 ] } }, "transformer.layers.3.2.to_k.bias": { "min": -8.17552661895752, "max": 8.776671409606934, "mean": -0.1091664582490921, "std": 1.6969325542449951, "abs_mean": 0.9193365573883057, "sparsity": 0.0, "shape": [ 1024 ], "norm": 54.38762283325195, "elements": 1024, "histogram": { "counts": [ 7, 3, 8, 10, 6, 8, 21, 40, 111, 518, 172, 46, 15, 4, 11, 13, 2, 2, 2, 1 ], "bin_edges": [ -8.17552661895752, -7.327916622161865, -6.480306625366211, -5.632697105407715, -4.7850871086120605, -3.9374771118164062, -3.08986759185791, -2.242257595062256, -1.3946475982666016, -0.5470376014709473, 0.30057239532470703, 1.1481819152832031, 1.9957914352416992, 2.8434019088745117, 3.691011428833008, 4.53862190246582, 5.386231422424316, 6.2338409423828125, 7.081451416015625, 7.929060935974121, 8.776671409606934 ] } }, "transformer.layers.3.2.to_v.weight": { "min": -0.27638494968414307, "max": 0.23973813652992249, "mean": 5.3197330998955294e-05, "std": 0.03261549770832062, "abs_mean": 0.025414835661649704, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 33.397850036621094, "elements": 1048576, "histogram": { "counts": [ 1, 2, 2, 13, 12, 38, 70, 93, 110, 158, 149, 123, 97, 51, 44, 19, 12, 3, 1, 2 ], "bin_edges": [ -0.11360769718885422, -0.10233889520168304, -0.09107010066509247, -0.0798012986779213, -0.06853249669075012, -0.05726369470357895, -0.04599490016698837, -0.0347260981798172, -0.023457296192646027, -0.012188494205474854, -0.0009196922183036804, 0.010349102318286896, 0.021617896854877472, 0.03288670629262924, 0.04415550082921982, 0.05542431026697159, 0.06669310480356216, 0.07796189934015274, 0.08923070877790451, 0.10049950331449509, 0.11176831275224686 ] } }, "transformer.layers.3.2.to_v.bias": { "min": -0.051992662250995636, "max": 0.03946495056152344, "mean": 9.150505502475426e-05, "std": 0.012954742647707462, "abs_mean": 0.010758287273347378, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.4143596589565277, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 2, 2, 15, 34, 77, 89, 114, 120, 105, 128, 106, 105, 61, 27, 9, 4, 1 ], "bin_edges": [ -0.051992662250995636, -0.04741978272795677, -0.04284690320491791, -0.038274019956588745, -0.03370114043354988, -0.029128260910511017, -0.024555379524827003, -0.01998249813914299, -0.015409618616104126, -0.010836739093065262, -0.006263859570026398, -0.001690976321697235, 0.002881903201341629, 0.007454782724380493, 0.012027665972709656, 0.01660054177045822, 0.021173425018787384, 0.025746308267116547, 0.030319184064865112, 0.034892067313194275, 0.03946495056152344 ] } }, "transformer.layers.3.2.to_out.0.weight": { "min": -0.23067787289619446, "max": 0.23443163931369781, "mean": -2.1657757315551862e-05, "std": 0.029391853138804436, "abs_mean": 0.022796550765633583, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 30.09688377380371, "elements": 1048576, "histogram": { "counts": [ 2, 2, 4, 11, 35, 54, 100, 147, 188, 141, 157, 84, 41, 18, 9, 3, 1, 2, 0, 1 ], "bin_edges": [ -0.10899056494235992, -0.09656518697738647, -0.08413980901241302, -0.07171443849802017, -0.05928906053304672, -0.04686368256807327, -0.03443831205368042, -0.02201293408870697, -0.00958755612373352, 0.002837821841239929, 0.015263199806213379, 0.02768857777118683, 0.040113940834999084, 0.052539318799972534, 0.06496469676494598, 0.07739007472991943, 0.08981545269489288, 0.10224083065986633, 0.11466620862483978, 0.12709158658981323, 0.1395169496536255 ] } }, "transformer.layers.3.2.to_out.0.bias": { "min": -0.20401200652122498, "max": 0.10544212907552719, "mean": -0.004023304674774408, "std": 0.0326065756380558, "abs_mean": 0.02599199116230011, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.050817608833313, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 0, 0, 0, 1, 7, 15, 58, 89, 166, 170, 191, 149, 87, 44, 17, 3, 2 ], "bin_edges": [ -0.20401200652122498, -0.1885392963886261, -0.17306658625602722, -0.15759387612342834, -0.14212118089199066, -0.12664847075939178, -0.11117576062679291, -0.09570305794477463, -0.08023034781217575, -0.06475764513015747, -0.049284934997558594, -0.03381222486495972, -0.01833951473236084, -0.002866804599761963, 0.01260589063167572, 0.028078600764274597, 0.043551310896873474, 0.05902400612831116, 0.07449671626091003, 0.08996942639350891, 0.10544212907552719 ] } }, "transformer.layers.3.3.g": { "min": 0.33983615040779114, "max": 1.0106816291809082, "mean": 0.7006407380104065, "std": 0.09645594656467438, "abs_mean": 0.7006407380104065, "sparsity": 0.0, "shape": [ 1024 ], "norm": 22.63176155090332, "elements": 1024, "histogram": { "counts": [ 1, 4, 9, 15, 17, 31, 30, 59, 73, 110, 135, 120, 169, 133, 72, 16, 2, 0, 1, 3 ], "bin_edges": [ 0.33983615040779114, 0.3733784258365631, 0.4069207012653351, 0.44046297669410706, 0.47400525212287903, 0.5075474977493286, 0.5410897731781006, 0.5746320486068726, 0.6081743240356445, 0.6417165994644165, 0.6752588748931885, 0.7088011503219604, 0.7423434257507324, 0.7758857011795044, 0.8094279766082764, 0.8429702520370483, 0.8765125274658203, 0.9100548028945923, 0.9435970783233643, 0.9771393537521362, 1.0106816291809082 ] } }, "transformer.layers.3.4.ff.0.0.weight": { "min": -0.5642791390419006, "max": 0.832179069519043, "mean": 0.00041513508767820895, "std": 0.042302437126636505, "abs_mean": 0.03297626972198486, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 86.63153839111328, "elements": 4194304, "histogram": { "counts": [ 5, 4, 13, 24, 42, 64, 116, 144, 143, 136, 109, 80, 53, 30, 16, 7, 5, 6, 1, 2 ], "bin_edges": [ -0.12684103846549988, -0.11227414011955261, -0.09770724177360535, -0.08314034342765808, -0.06857344508171082, -0.05400654673576355, -0.039439648389816284, -0.02487275004386902, -0.010305851697921753, 0.004261046648025513, 0.01882794499397278, 0.033394843339920044, 0.04796174168586731, 0.06252864003181458, 0.07709553837776184, 0.0916624367237091, 0.10622933506965637, 0.12079623341560364, 0.1353631317615509, 0.14993003010749817, 0.16449694335460663 ] } }, "transformer.layers.3.4.ff.0.0.bias": { "min": -0.21134838461875916, "max": 0.030589817091822624, "mean": -0.032172758132219315, "std": 0.026476319879293442, "abs_mean": 0.03335808217525482, "sparsity": 0.0, "shape": [ 4096 ], "norm": 2.6665127277374268, "elements": 4096, "histogram": { "counts": [ 1, 1, 3, 2, 2, 4, 8, 15, 29, 34, 42, 92, 120, 123, 186, 155, 94, 63, 21, 5 ], "bin_edges": [ -0.1719832420349121, -0.1618545949459076, -0.15172593295574188, -0.14159728586673737, -0.13146862387657166, -0.12133997678756714, -0.11121132969856262, -0.10108267515897751, -0.0909540206193924, -0.08082536607980728, -0.07069671154022217, -0.06056806445121765, -0.05043940991163254, -0.040310755372047424, -0.030182108283042908, -0.020053446292877197, -0.00992479920387268, 0.00020384788513183594, 0.010332509875297546, 0.020461156964302063, 0.030589817091822624 ] } }, "transformer.layers.3.4.ff.2.weight": { "min": -0.7536408305168152, "max": 0.717832088470459, "mean": -9.409409358340781e-06, "std": 0.03684220835566521, "abs_mean": 0.027992695569992065, "sparsity": 0.0, "shape": [ 1024, 4096 ], "norm": 75.44456481933594, "elements": 4194304, "histogram": { "counts": [ 1, 0, 3, 1, 5, 24, 49, 104, 159, 198, 187, 124, 76, 37, 22, 6, 2, 1, 0, 1 ], "bin_edges": [ -0.15810149908065796, -0.14198999106884003, -0.1258784681558609, -0.10976696014404297, -0.09365544468164444, -0.07754392921924591, -0.06143242120742798, -0.04532090574502945, -0.02920939028263092, -0.013097882270812988, 0.0030136406421661377, 0.01912514865398407, 0.035236656665802, 0.05134817957878113, 0.06745968759059906, 0.08357121050357819, 0.09968271851539612, 0.11579424142837524, 0.13190573453903198, 0.1480172574520111, 0.16412878036499023 ] } }, "transformer.layers.3.4.ff.2.bias": { "min": -0.2631220519542694, "max": 0.10570736974477768, "mean": -0.003029324347153306, "std": 0.028848078101873398, "abs_mean": 0.02227037213742733, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.9277658462524414, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 59, 144, 244, 248, 180, 72, 20, 8, 1 ], "bin_edges": [ -0.2631220519542694, -0.24468058347702026, -0.22623911499977112, -0.20779763162136078, -0.18935616314411163, -0.1709146946668625, -0.15247321128845215, -0.134031742811203, -0.11559027433395386, -0.09714880585670471, -0.07870733737945557, -0.06026585400104523, -0.04182438552379608, -0.023382917046546936, -0.004941433668136597, 0.013500034809112549, 0.031941503286361694, 0.05038297176361084, 0.06882444024085999, 0.08726590871810913, 0.10570736974477768 ] } }, "transformer.layers.4.1.g": { "min": 0.28446710109710693, "max": 0.6937389373779297, "mean": 0.49939653277397156, "std": 0.04629269987344742, "abs_mean": 0.49939653277397156, "sparsity": 0.0, "shape": [ 1024 ], "norm": 16.049135208129883, "elements": 1024, "histogram": { "counts": [ 2, 0, 4, 5, 8, 7, 33, 57, 94, 183, 193, 175, 128, 56, 36, 13, 3, 1, 0, 2 ], "bin_edges": [ 0.28446710109710693, 0.3049306869506836, 0.32539427280426025, 0.3458578586578369, 0.36632147431373596, 0.3867850601673126, 0.4072486460208893, 0.42771226167678833, 0.448175847530365, 0.46863943338394165, 0.4891030192375183, 0.509566605091095, 0.5300301909446716, 0.5504937767982483, 0.5709574222564697, 0.5914210081100464, 0.611884593963623, 0.6323481798171997, 0.6528117656707764, 0.673275351524353, 0.6937389373779297 ] } }, "transformer.layers.4.2.to_q.weight": { "min": -0.27887189388275146, "max": 0.23408503830432892, "mean": -0.00011133109364891425, "std": 0.03876320272684097, "abs_mean": 0.030566837638616562, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 39.69315719604492, "elements": 1048576, "histogram": { "counts": [ 1, 0, 0, 0, 0, 2, 4, 8, 27, 51, 107, 157, 166, 166, 137, 95, 47, 18, 8, 6 ], "bin_edges": [ -0.22455070912837982, -0.2071923017501831, -0.1898338794708252, -0.17247545719146729, -0.15511704981327057, -0.13775864243507385, -0.12040022015571594, -0.10304180532693863, -0.08568339049816132, -0.0683249682188034, -0.05096656084060669, -0.03360815346240997, -0.016249731183052063, 0.0011086910963058472, 0.018467098474502563, 0.03582550585269928, 0.05318392813205719, 0.0705423504114151, 0.08790077269077301, 0.10525916516780853, 0.12261758744716644 ] } }, "transformer.layers.4.2.to_q.bias": { "min": -0.15426576137542725, "max": 0.1266399770975113, "mean": -0.0022300498094409704, "std": 0.0333842970430851, "abs_mean": 0.024381492286920547, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.070157766342163, "elements": 1024, "histogram": { "counts": [ 1, 0, 2, 7, 7, 18, 21, 40, 73, 151, 224, 190, 113, 81, 37, 13, 10, 5, 5, 2 ], "bin_edges": [ -0.15426576137542725, -0.14022047817707062, -0.126175194978714, -0.11212990432977676, -0.09808461368083954, -0.08403933048248291, -0.06999404728412628, -0.055948756635189056, -0.04190347343683243, -0.027858182787895203, -0.013812899589538574, 0.0002323836088180542, 0.014277666807174683, 0.02832295000553131, 0.04236824810504913, 0.05641353130340576, 0.07045881450176239, 0.08450409770011902, 0.09854939579963684, 0.11259466409683228, 0.1266399770975113 ] } }, "transformer.layers.4.2.to_k.weight": { "min": -0.41348376870155334, "max": 0.6593844294548035, "mean": -1.978595719265286e-05, "std": 0.039100244641304016, "abs_mean": 0.030760983005166054, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 40.038177490234375, "elements": 1048576, "histogram": { "counts": [ 1, 1, 2, 3, 6, 21, 30, 55, 70, 122, 150, 140, 132, 110, 73, 39, 22, 11, 8, 4 ], "bin_edges": [ -0.14758364856243134, -0.1345057636499405, -0.12142786383628845, -0.10834997892379761, -0.09527208656072617, -0.08219419419765472, -0.06911630928516388, -0.05603841692209244, -0.042960524559020996, -0.029882632195949554, -0.016804739832878113, -0.003726854920387268, 0.009351029992103577, 0.022428929805755615, 0.03550681471824646, 0.0485847145318985, 0.06166259944438934, 0.07474048435688019, 0.08781838417053223, 0.10089626908302307, 0.11397416889667511 ] } }, "transformer.layers.4.2.to_k.bias": { "min": -4.232041358947754, "max": 4.715827465057373, "mean": -0.020488303154706955, "std": 1.0068391561508179, "abs_mean": 0.6846990585327148, "sparsity": 0.0, "shape": [ 1024 ], "norm": 32.20978927612305, "elements": 1024, "histogram": { "counts": [ 2, 5, 4, 7, 14, 20, 55, 88, 179, 306, 165, 63, 35, 23, 16, 5, 8, 2, 1, 2 ], "bin_edges": [ -4.232041358947754, -3.7846479415893555, -3.337254524230957, -2.8898611068725586, -2.44246768951416, -1.9950742721557617, -1.5476808547973633, -1.1002874374389648, -0.6528940200805664, -0.20550060272216797, 0.24189281463623047, 0.6892862319946289, 1.1366796493530273, 1.5840730667114258, 2.031466484069824, 2.4788599014282227, 2.926253318786621, 3.3736467361450195, 3.821040153503418, 4.268433570861816, 4.715827465057373 ] } }, "transformer.layers.4.2.to_v.weight": { "min": -0.24481239914894104, "max": 0.2074868232011795, "mean": 4.380439349915832e-05, "std": 0.03396626561880112, "abs_mean": 0.026826273649930954, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 34.780982971191406, "elements": 1048576, "histogram": { "counts": [ 1, 1, 12, 17, 44, 59, 117, 165, 159, 138, 130, 76, 33, 17, 18, 10, 1, 1, 0, 1 ], "bin_edges": [ -0.11701656132936478, -0.10330986231565475, -0.08960317075252533, -0.07589647173881531, -0.062189772725105286, -0.048483073711395264, -0.03477638214826584, -0.021069683134555817, -0.007362984120845795, 0.006343714892864227, 0.02005041390657425, 0.03375711292028427, 0.0474637970328331, 0.06117049604654312, 0.07487719506025314, 0.08858389407396317, 0.10229059308767319, 0.11599729210138321, 0.12970399856567383, 0.14341068267822266, 0.15711738169193268 ] } }, "transformer.layers.4.2.to_v.bias": { "min": -0.03449943661689758, "max": 0.044728994369506836, "mean": -1.8020247807726264e-05, "std": 0.012624197639524937, "abs_mean": 0.010656064376235008, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.40377742052078247, "elements": 1024, "histogram": { "counts": [ 2, 3, 11, 48, 74, 99, 104, 94, 97, 102, 97, 86, 75, 70, 27, 7, 2, 0, 1, 1 ], "bin_edges": [ -0.03449943661689758, -0.030538015067577362, -0.02657659351825714, -0.02261517196893692, -0.0186537504196167, -0.014692328870296478, -0.010730907320976257, -0.006769485771656036, -0.0028080642223358154, 0.0011533573269844055, 0.0051147788763046265, 0.009076200425624847, 0.013037621974945068, 0.01699904352426529, 0.02096046507358551, 0.02492188662290573, 0.028883308172225952, 0.03284472972154617, 0.036806151270866394, 0.040767572820186615, 0.044728994369506836 ] } }, "transformer.layers.4.2.to_out.0.weight": { "min": -0.20050014555454254, "max": 0.20566238462924957, "mean": -2.9678063583560288e-05, "std": 0.03102380409836769, "abs_mean": 0.024439673870801926, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 31.76807975769043, "elements": 1048576, "histogram": { "counts": [ 5, 13, 17, 49, 81, 119, 125, 139, 161, 113, 76, 46, 30, 8, 9, 3, 4, 1, 0, 1 ], "bin_edges": [ -0.08439754694700241, -0.07326537370681763, -0.06213320419192314, -0.051001034677028656, -0.03986886143684387, -0.028736688196659088, -0.0176045224070549, -0.006472349166870117, 0.004659824073314667, 0.01579199731349945, 0.026924170553684235, 0.03805633634328842, 0.04918850213289261, 0.06032068282365799, 0.07145284861326218, 0.08258502930402756, 0.09371719509363174, 0.10484936088323593, 0.11598154157400131, 0.1271136999130249, 0.13824589550495148 ] } }, "transformer.layers.4.2.to_out.0.bias": { "min": -0.19964830577373505, "max": 0.11326169967651367, "mean": -0.00291792256757617, "std": 0.03448895364999771, "abs_mean": 0.02703409641981125, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.107052206993103, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 0, 0, 1, 4, 16, 25, 64, 119, 165, 188, 162, 123, 73, 27, 27, 3, 2 ], "bin_edges": [ -0.19964830577373505, -0.1840028017759323, -0.16835731267929077, -0.15271180868148804, -0.1370663046836853, -0.12142080068588257, -0.10577530413866043, -0.0901298075914383, -0.07448430359363556, -0.058838799595832825, -0.04319329559803009, -0.02754780650138855, -0.011902302503585815, 0.003743201494216919, 0.01938869059085846, 0.035034194588661194, 0.05067969858646393, 0.06632520258426666, 0.0819707065820694, 0.09761621057987213, 0.11326169967651367 ] } }, "transformer.layers.4.3.g": { "min": 0.36708179116249084, "max": 1.0548574924468994, "mean": 0.6704699397087097, "std": 0.06616173684597015, "abs_mean": 0.6704699397087097, "sparsity": 0.0, "shape": [ 1024 ], "norm": 21.559146881103516, "elements": 1024, "histogram": { "counts": [ 2, 8, 5, 8, 16, 41, 68, 101, 219, 272, 189, 59, 8, 1, 0, 1, 0, 1, 0, 1 ], "bin_edges": [ 0.36708179116249084, 0.4014705717563629, 0.4358593821525574, 0.47024816274642944, 0.5046369433403015, 0.5390257239341736, 0.5734145045280457, 0.6078033447265625, 0.6421921253204346, 0.6765809059143066, 0.7109696865081787, 0.7453584671020508, 0.7797472476959229, 0.8141360282897949, 0.848524808883667, 0.8829135894775391, 0.9173023700714111, 0.9516911506652832, 0.9860799312591553, 1.0204687118530273, 1.0548574924468994 ] } }, "transformer.layers.4.4.ff.0.0.weight": { "min": -0.397816002368927, "max": 0.5021188855171204, "mean": -3.856579860439524e-05, "std": 0.041137274354696274, "abs_mean": 0.03244972229003906, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 84.24102783203125, "elements": 4194304, "histogram": { "counts": [ 3, 3, 14, 16, 38, 63, 98, 132, 140, 160, 116, 89, 69, 29, 17, 5, 5, 1, 0, 2 ], "bin_edges": [ -0.13683238625526428, -0.12173408269882202, -0.10663577914237976, -0.0915374681353569, -0.07643916457891464, -0.06134086102247238, -0.046242550015449524, -0.031144246459007263, -0.016045942902565002, -0.0009476393461227417, 0.014150664210319519, 0.02924896776676178, 0.044347286224365234, 0.059445589780807495, 0.07454389333724976, 0.08964219689369202, 0.10474050045013428, 0.11983880400657654, 0.1349371075630188, 0.15003541111946106, 0.16513371467590332 ] } }, "transformer.layers.4.4.ff.0.0.bias": { "min": -0.12784262001514435, "max": 0.02675941213965416, "mean": -0.030531462281942368, "std": 0.02184327319264412, "abs_mean": 0.03135973587632179, "sparsity": 0.0, "shape": [ 4096 ], "norm": 2.402501106262207, "elements": 4096, "histogram": { "counts": [ 1, 1, 3, 7, 7, 9, 17, 21, 46, 66, 82, 111, 106, 135, 128, 96, 85, 47, 21, 11 ], "bin_edges": [ -0.11856583505868912, -0.11168412119150162, -0.10480239987373352, -0.09792068600654602, -0.09103897213935852, -0.08415725827217102, -0.07727553695440292, -0.07039382308721542, -0.06351210176944733, -0.05663038790225983, -0.04974867403507233, -0.04286696016788483, -0.03598523885011673, -0.02910352498292923, -0.02222181111574173, -0.015340089797973633, -0.008458375930786133, -0.0015766620635986328, 0.005305059254169464, 0.012186773121356964, 0.019068485125899315 ] } }, "transformer.layers.4.4.ff.2.weight": { "min": -0.4485797882080078, "max": 0.43235480785369873, "mean": 8.378911297768354e-05, "std": 0.034896139055490494, "abs_mean": 0.0270802304148674, "sparsity": 0.0, "shape": [ 1024, 4096 ], "norm": 71.46002197265625, "elements": 4194304, "histogram": { "counts": [ 2, 4, 3, 15, 26, 50, 70, 104, 124, 153, 140, 116, 64, 65, 31, 14, 13, 3, 2, 1 ], "bin_edges": [ -0.11277943849563599, -0.10100102424621582, -0.08922261744737625, -0.07744420319795609, -0.06566579639911652, -0.05388738214969635, -0.042108967900276184, -0.030330561101436615, -0.01855214685201645, -0.006773732602596283, 0.005004674196243286, 0.016783088445663452, 0.028561502695083618, 0.040339916944503784, 0.052118316292762756, 0.06389673054218292, 0.07567514479160309, 0.08745355904102325, 0.09923197329044342, 0.11101037263870239, 0.12278879433870316 ] } }, "transformer.layers.4.4.ff.2.bias": { "min": -0.26721277832984924, "max": 0.07248232513666153, "mean": -0.0011095060035586357, "std": 0.023109637200832367, "abs_mean": 0.01726192981004715, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.7399994730949402, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 20, 78, 179, 306, 263, 116, 32, 1 ], "bin_edges": [ -0.26721277832984924, -0.25022801756858826, -0.23324327170848846, -0.21625851094722748, -0.19927376508712769, -0.1822890043258667, -0.1653042435646057, -0.14831948280334473, -0.13133473694324493, -0.11434997618198395, -0.09736523032188416, -0.08038046956062317, -0.06339570879936218, -0.04641096293926239, -0.029426202178001404, -0.012441456317901611, 0.004543304443359375, 0.02152806520462036, 0.03851282596588135, 0.055497556924819946, 0.07248232513666153 ] } }, "transformer.layers.5.1.g": { "min": 0.287344753742218, "max": 0.6839542388916016, "mean": 0.5244242548942566, "std": 0.047291453927755356, "abs_mean": 0.5244242548942566, "sparsity": 0.0, "shape": [ 1024 ], "norm": 16.849607467651367, "elements": 1024, "histogram": { "counts": [ 2, 0, 2, 2, 5, 6, 12, 17, 34, 94, 152, 158, 183, 152, 101, 53, 16, 3, 5, 3 ], "bin_edges": [ 0.287344753742218, 0.3071752190589905, 0.3270057141780853, 0.3468361794948578, 0.36666664481163025, 0.3864971399307251, 0.40632760524749756, 0.42615807056427, 0.4459885358810425, 0.46581903100013733, 0.4856494963169098, 0.5054799914360046, 0.5253104567527771, 0.5451409220695496, 0.564971387386322, 0.5848019123077393, 0.6046323776245117, 0.6244628429412842, 0.6442933082580566, 0.6641237735748291, 0.6839542388916016 ] } }, "transformer.layers.5.2.to_q.weight": { "min": -0.22201856970787048, "max": 0.22311273217201233, "mean": 1.577789407747332e-05, "std": 0.038952890783548355, "abs_mean": 0.030697684735059738, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 39.88728713989258, "elements": 1048576, "histogram": { "counts": [ 2, 6, 15, 28, 34, 68, 97, 121, 126, 155, 118, 90, 67, 39, 19, 6, 4, 1, 0, 4 ], "bin_edges": [ -0.12169166654348373, -0.10821932554244995, -0.09474697709083557, -0.08127463608980179, -0.067802295088768, -0.054329946637153625, -0.04085760563611984, -0.027385257184505463, -0.01391291618347168, -0.00044057518243789673, 0.013031773269176483, 0.026504121720790863, 0.03997645527124405, 0.05344880372285843, 0.06692115217447281, 0.080393485724926, 0.09386583417654037, 0.10733818262815475, 0.12081051617860794, 0.13428285717964172, 0.1477552056312561 ] } }, "transformer.layers.5.2.to_q.bias": { "min": -0.13627174496650696, "max": 0.1090594157576561, "mean": 0.00023713918926659971, "std": 0.029215561226010323, "abs_mean": 0.020363088697195053, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.9344722032546997, "elements": 1024, "histogram": { "counts": [ 1, 1, 2, 10, 8, 4, 13, 30, 50, 124, 221, 230, 156, 75, 31, 13, 8, 8, 10, 5 ], "bin_edges": [ -0.13627174496650696, -0.12400518357753754, -0.11173862963914871, -0.09947206825017929, -0.08720551431179047, -0.07493895292282104, -0.06267239153385162, -0.0504058375954628, -0.03813927620649338, -0.025872714817523956, -0.013606160879135132, -0.0013395994901657104, 0.010926961898803711, 0.023193523287773132, 0.03546006977558136, 0.04772663116455078, 0.0599931925535202, 0.07225975394248962, 0.08452631533145905, 0.09679286181926727, 0.1090594157576561 ] } }, "transformer.layers.5.2.to_k.weight": { "min": -0.3747805953025818, "max": 0.43678468465805054, "mean": -9.573410352459177e-06, "std": 0.03928905352950096, "abs_mean": 0.030855529010295868, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 40.23149871826172, "elements": 1048576, "histogram": { "counts": [ 1, 2, 3, 6, 15, 20, 41, 58, 97, 130, 151, 138, 107, 94, 60, 43, 14, 12, 3, 5 ], "bin_edges": [ -0.1474566012620926, -0.133971706032753, -0.1204868033528328, -0.1070019081234932, -0.093517005443573, -0.0800321102142334, -0.0665472149848938, -0.0530623123049736, -0.039577417075634, -0.026092521846294403, -0.012607619166374207, 0.0008772760629653931, 0.014362171292304993, 0.027847066521644592, 0.041331976652145386, 0.054816871881484985, 0.06830176711082458, 0.08178666234016418, 0.09527155756950378, 0.10875646770000458, 0.12224137037992477 ] } }, "transformer.layers.5.2.to_k.bias": { "min": -3.840266227722168, "max": 4.992228984832764, "mean": 0.009751387871801853, "std": 0.8444771766662598, "abs_mean": 0.5379955172538757, "sparsity": 0.0, "shape": [ 1024 ], "norm": 27.01187515258789, "elements": 1024, "histogram": { "counts": [ 2, 3, 5, 11, 13, 33, 65, 97, 424, 180, 92, 33, 17, 8, 9, 6, 0, 0, 1, 1 ], "bin_edges": [ -3.840266227722168, -3.398641586303711, -2.957016706466675, -2.5153920650482178, -2.0737671852111816, -1.6321425437927246, -1.1905179023742676, -0.7488930225372314, -0.3072683811187744, 0.13435626029968262, 0.5759811401367188, 1.0176057815551758, 1.4592304229736328, 1.9008550643920898, 2.342480182647705, 2.784104824066162, 3.225729465484619, 3.667354106903076, 4.108978748321533, 4.550603866577148, 4.992228984832764 ] } }, "transformer.layers.5.2.to_v.weight": { "min": -0.22314536571502686, "max": 0.21986283361911774, "mean": -2.0974857761757448e-07, "std": 0.034413520246744156, "abs_mean": 0.027191974222660065, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 35.238975524902344, "elements": 1048576, "histogram": { "counts": [ 2, 0, 0, 8, 13, 28, 46, 96, 111, 135, 134, 130, 109, 73, 49, 26, 24, 10, 2, 4 ], "bin_edges": [ -0.12520456314086914, -0.11339918524026871, -0.10159379988908768, -0.08978842198848724, -0.07798303663730621, -0.06617765873670578, -0.05437228083610535, -0.042566895484924316, -0.030761517584323883, -0.01895613968372345, -0.007150754332542419, 0.004654631018638611, 0.016460001468658447, 0.028265386819839478, 0.04007077217102051, 0.051876142621040344, 0.06368152797222137, 0.0754869133234024, 0.08729228377342224, 0.09909766912460327, 0.1109030619263649 ] } }, "transformer.layers.5.2.to_v.bias": { "min": -0.043581560254096985, "max": 0.03578736633062363, "mean": -0.00025875651044771075, "std": 0.012076529674232006, "abs_mean": 0.01006263680756092, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.3863489329814911, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 1, 13, 35, 55, 77, 110, 126, 102, 104, 94, 104, 87, 47, 29, 11, 2, 2 ], "bin_edges": [ -0.043581560254096985, -0.039613112807273865, -0.035644665360450745, -0.03167622163891792, -0.027707774192094803, -0.023739326745271683, -0.019770881161093712, -0.01580243557691574, -0.01183398813009262, -0.0078655406832695, -0.0038970932364463806, 7.135048508644104e-05, 0.004039797931909561, 0.008008245378732681, 0.011976689100265503, 0.015945136547088623, 0.019913583993911743, 0.023882031440734863, 0.027850478887557983, 0.031818926334381104, 0.03578736633062363 ] } }, "transformer.layers.5.2.to_out.0.weight": { "min": -0.21286383271217346, "max": 0.18843913078308105, "mean": -1.6783855244284496e-05, "std": 0.03154028207063675, "abs_mean": 0.0248585008084774, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 32.29688262939453, "elements": 1048576, "histogram": { "counts": [ 1, 3, 7, 8, 30, 33, 60, 86, 96, 139, 134, 119, 113, 71, 33, 44, 7, 7, 7, 2 ], "bin_edges": [ -0.1055210754275322, -0.09519147127866745, -0.0848618745803833, -0.07453227043151855, -0.06420266628265381, -0.05387306213378906, -0.043543461710214615, -0.03321386128664017, -0.02288425713777542, -0.012554652988910675, -0.002225048840045929, 0.00810454785823822, 0.018434152007102966, 0.028763748705387115, 0.03909335285425186, 0.04942295700311661, 0.059752561151981354, 0.0700821653008461, 0.08041176944971085, 0.09074137359857559, 0.10107096284627914 ] } }, "transformer.layers.5.2.to_out.0.bias": { "min": -0.18049854040145874, "max": 0.12063688784837723, "mean": -0.0024107899516820908, "std": 0.04124762490391731, "abs_mean": 0.03318789601325989, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.321532964706421, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 0, 3, 16, 14, 49, 73, 111, 118, 130, 152, 118, 89, 61, 34, 20, 8, 3 ], "bin_edges": [ -0.18049854040145874, -0.16544176638126373, -0.15038499236106873, -0.1353282332420349, -0.1202714592218399, -0.1052146852016449, -0.09015791118144989, -0.07510114461183548, -0.06004437059164047, -0.04498760402202606, -0.029930830001831055, -0.014874055981636047, 0.00018271803855895996, 0.015239492058753967, 0.03029625117778778, 0.04535302519798279, 0.060409799218177795, 0.0754665732383728, 0.09052333235740662, 0.10558012127876282, 0.12063688784837723 ] } }, "transformer.layers.5.3.g": { "min": 0.4223836064338684, "max": 0.9401367902755737, "mean": 0.6626168489456177, "std": 0.05654710531234741, "abs_mean": 0.6626168489456177, "sparsity": 0.0, "shape": [ 1024 ], "norm": 21.280733108520508, "elements": 1024, "histogram": { "counts": [ 4, 9, 2, 13, 14, 35, 48, 97, 157, 217, 232, 101, 56, 9, 2, 2, 0, 0, 1, 1 ], "bin_edges": [ 0.4223836064338684, 0.4482712745666504, 0.47415891289711, 0.5000466108322144, 0.5259342193603516, 0.5518218874931335, 0.5777095556259155, 0.6035972237586975, 0.6294848918914795, 0.6553725004196167, 0.6812602281570435, 0.7071478366851807, 0.7330355048179626, 0.7589231729507446, 0.7848108410835266, 0.8106985092163086, 0.8365861177444458, 0.8624738454818726, 0.8883614540100098, 0.9142491221427917, 0.9401367902755737 ] } }, "transformer.layers.5.4.ff.0.0.weight": { "min": -0.3711914122104645, "max": 0.4754900634288788, "mean": -8.231064566643909e-05, "std": 0.04089626669883728, "abs_mean": 0.03231760859489441, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 83.74752807617188, "elements": 4194304, "histogram": { "counts": [ 1, 1, 0, 8, 14, 28, 49, 75, 109, 152, 134, 135, 118, 79, 46, 32, 11, 2, 3, 3 ], "bin_edges": [ -0.15829983353614807, -0.14343194663524628, -0.12856405973434448, -0.11369617283344269, -0.0988282859325409, -0.0839603990316391, -0.0690925121307373, -0.05422462522983551, -0.039356738328933716, -0.02448885142803192, -0.009620964527130127, 0.0052469223737716675, 0.020114809274673462, 0.034982696175575256, 0.04985058307647705, 0.06471846997737885, 0.07958635687828064, 0.09445425868034363, 0.10932213068008423, 0.12419000267982483, 0.139057919383049 ] } }, "transformer.layers.5.4.ff.0.0.bias": { "min": -0.2078404426574707, "max": 0.02713177166879177, "mean": -0.030231105163693428, "std": 0.021318932995200157, "abs_mean": 0.03098458983004093, "sparsity": 0.0, "shape": [ 4096 ], "norm": 2.367399215698242, "elements": 4096, "histogram": { "counts": [ 1, 0, 0, 0, 0, 0, 0, 1, 1, 3, 13, 25, 44, 93, 194, 226, 215, 112, 58, 14 ], "bin_edges": [ -0.2078404426574707, -0.1963563710451126, -0.18487228453159332, -0.17338821291923523, -0.16190414130687714, -0.15042006969451904, -0.13893598318099976, -0.12745191156864166, -0.11596783250570297, -0.10448375344276428, -0.09299968183040619, -0.0815156102180481, -0.07003152370452881, -0.058547452092170715, -0.04706338047981262, -0.035579293966293335, -0.02409522235393524, -0.012611150741577148, -0.0011270642280578613, 0.010357007384300232, 0.021841073408722878 ] } }, "transformer.layers.5.4.ff.2.weight": { "min": -0.3397354185581207, "max": 0.7327741384506226, "mean": 8.48791969474405e-05, "std": 0.03477150574326515, "abs_mean": 0.027075331658124924, "sparsity": 0.0, "shape": [ 1024, 4096 ], "norm": 71.20501708984375, "elements": 4194304, "histogram": { "counts": [ 2, 4, 20, 33, 63, 94, 120, 147, 144, 139, 98, 56, 42, 21, 8, 2, 5, 1, 0, 1 ], "bin_edges": [ -0.10304070264101028, -0.09046163409948349, -0.07788257300853729, -0.0653035044670105, -0.052724439650774, -0.040145374834537506, -0.02756630629301071, -0.014987245202064514, -0.0024081766605377197, 0.010170891880989075, 0.022749952971935272, 0.03532902151346207, 0.04790809005498886, 0.060487158596515656, 0.07306621223688126, 0.08564528077840805, 0.09822434931993484, 0.11080341786146164, 0.12338248640298843, 0.13596153259277344, 0.14854060113430023 ] } }, "transformer.layers.5.4.ff.2.bias": { "min": -0.23985552787780762, "max": 0.050368692725896835, "mean": -0.0011948456522077322, "std": 0.02045026607811451, "abs_mean": 0.015394063666462898, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.655205488204956, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 23, 99, 217, 301, 233, 103, 18 ], "bin_edges": [ -0.23985552787780762, -0.22534431517124176, -0.2108331024646759, -0.19632190465927124, -0.1818106770515442, -0.16729947924613953, -0.15278826653957367, -0.1382770538330078, -0.12376584112644196, -0.1092546284198761, -0.09474341571331024, -0.08023220300674438, -0.06572100520133972, -0.051209792494773865, -0.03669857978820801, -0.02218736708164215, -0.007676154375076294, 0.006835058331489563, 0.02134627103805542, 0.03585746884346008, 0.050368692725896835 ] } }, "transformer.layers.6.1.g": { "min": 0.3060871660709381, "max": 0.6523372530937195, "mean": 0.5249941945075989, "std": 0.04590437561273575, "abs_mean": 0.5249941945075989, "sparsity": 0.0, "shape": [ 1024 ], "norm": 16.863849639892578, "elements": 1024, "histogram": { "counts": [ 1, 1, 1, 2, 6, 15, 12, 18, 31, 54, 100, 109, 158, 169, 140, 118, 38, 20, 5, 2 ], "bin_edges": [ 0.3060871660709381, 0.3233996629714966, 0.34071218967437744, 0.3580246865749359, 0.3753371834754944, 0.39264968037605286, 0.40996217727661133, 0.4272747039794922, 0.44458720088005066, 0.46189969778060913, 0.47921222448349, 0.49652472138404846, 0.5138372182846069, 0.5311497449874878, 0.5484622120857239, 0.5657747387886047, 0.5830872058868408, 0.6003997325897217, 0.6177122592926025, 0.6350247859954834, 0.6523372530937195 ] } }, "transformer.layers.6.2.to_q.weight": { "min": -0.30396750569343567, "max": 0.2171545922756195, "mean": 7.000747427809983e-05, "std": 0.03949857875704765, "abs_mean": 0.031311385333538055, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 40.446109771728516, "elements": 1048576, "histogram": { "counts": [ 4, 5, 10, 15, 33, 42, 67, 99, 94, 119, 120, 112, 103, 83, 35, 28, 14, 10, 3, 4 ], "bin_edges": [ -0.12200530618429184, -0.10982391238212585, -0.09764251112937927, -0.08546111732721329, -0.0732797235250473, -0.06109832227230072, -0.048916928470134735, -0.03673552721738815, -0.024554133415222168, -0.012372739613056183, -0.00019133836030960083, 0.011990062892436981, 0.02417144924402237, 0.03635285049676895, 0.04853425174951553, 0.06071563810110092, 0.0728970393538475, 0.08507844060659409, 0.09725982695817947, 0.10944122821092606, 0.12162262946367264 ] } }, "transformer.layers.6.2.to_q.bias": { "min": -0.14921154081821442, "max": 0.1312280148267746, "mean": 0.00034826344926841557, "std": 0.030445020645856857, "abs_mean": 0.020625557750463486, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.9738286137580872, "elements": 1024, "histogram": { "counts": [ 2, 2, 1, 7, 8, 9, 12, 31, 64, 187, 255, 231, 125, 23, 15, 5, 6, 7, 5, 5 ], "bin_edges": [ -0.14921154081821442, -0.13518956303596497, -0.12116758525371552, -0.10714560747146606, -0.09312362968921661, -0.07910165190696716, -0.06507967412471771, -0.05105769634246826, -0.03703571856021881, -0.02301374077796936, -0.00899176299571991, 0.005030214786529541, 0.01905219256877899, 0.03307417035102844, 0.04709614813327789, 0.061118125915527344, 0.0751401036977768, 0.08916208148002625, 0.1031840592622757, 0.11720602214336395, 0.1312280148267746 ] } }, "transformer.layers.6.2.to_k.weight": { "min": -0.2569451630115509, "max": 0.20191657543182373, "mean": 3.105865835095756e-05, "std": 0.03948771581053734, "abs_mean": 0.03128843009471893, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 40.43494415283203, "elements": 1048576, "histogram": { "counts": [ 1, 0, 0, 0, 3, 17, 24, 58, 109, 156, 169, 172, 122, 92, 39, 22, 12, 2, 0, 2 ], "bin_edges": [ -0.1837465465068817, -0.1667291522026062, -0.1497117578983307, -0.13269434869289398, -0.11567695438861847, -0.09865956008434296, -0.08164215832948685, -0.06462475657463074, -0.047607362270355225, -0.030589967966079712, -0.0135725736618042, 0.0034448355436325073, 0.02046222984790802, 0.03747962415218353, 0.05449703335762024, 0.07151442766189575, 0.08853182196617126, 0.10554921627044678, 0.12256661057472229, 0.1395840048789978, 0.15660138428211212 ] } }, "transformer.layers.6.2.to_k.bias": { "min": -2.332984685897827, "max": 2.372544527053833, "mean": -0.026222502812743187, "std": 0.44942858815193176, "abs_mean": 0.3095816969871521, "sparsity": 0.0, "shape": [ 1024 ], "norm": 14.399161338806152, "elements": 1024, "histogram": { "counts": [ 3, 2, 1, 5, 7, 12, 19, 72, 144, 274, 271, 108, 47, 17, 6, 3, 6, 0, 2, 1 ], "bin_edges": [ -2.332984685897827, -2.097708225250244, -1.8624317646026611, -1.6271553039550781, -1.3918788433074951, -1.156602382659912, -0.9213259220123291, -0.6860494613647461, -0.4507730007171631, -0.21549654006958008, 0.01977992057800293, 0.25505638122558594, 0.49033284187316895, 0.725609302520752, 0.960885763168335, 1.196162223815918, 1.431438684463501, 1.666715145111084, 1.901991605758667, 2.137268304824829, 2.372544527053833 ] } }, "transformer.layers.6.2.to_v.weight": { "min": -0.1888340413570404, "max": 0.21024198830127716, "mean": 3.7197845813352615e-05, "std": 0.03479824960231781, "abs_mean": 0.02761632390320301, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 35.63287353515625, "elements": 1048576, "histogram": { "counts": [ 2, 0, 3, 12, 19, 45, 56, 85, 120, 134, 144, 115, 113, 52, 61, 21, 11, 2, 2, 3 ], "bin_edges": [ -0.12260862439870834, -0.11051894724369049, -0.09842926263809204, -0.08633958548307419, -0.07424990832805634, -0.062160223722457886, -0.05007054656744003, -0.03798086196184158, -0.02589118480682373, -0.013801507651805878, -0.001711823046207428, 0.010377861559391022, 0.022467531263828278, 0.03455721586942673, 0.04664690047502518, 0.05873657017946243, 0.07082625478506088, 0.08291593939065933, 0.09500560909509659, 0.10709529370069504, 0.11918498575687408 ] } }, "transformer.layers.6.2.to_v.bias": { "min": -0.031675707548856735, "max": 0.035443130880594254, "mean": -0.00020022659737151116, "std": 0.012285580858588219, "abs_mean": 0.010282876901328564, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.3929988145828247, "elements": 1024, "histogram": { "counts": [ 4, 11, 19, 38, 47, 88, 91, 82, 97, 72, 91, 97, 89, 55, 58, 35, 16, 4, 4, 2 ], "bin_edges": [ -0.031675707548856735, -0.028319764882326126, -0.024963824078440666, -0.021607881411910057, -0.018251940608024597, -0.014895997941493988, -0.011540055274963379, -0.008184114471077919, -0.00482817180454731, -0.0014722291380167007, 0.0018837116658687592, 0.005239654332399368, 0.008595596998929977, 0.011951539665460587, 0.015307478606700897, 0.018663421273231506, 0.022019363939762115, 0.025375306606292725, 0.028731249272823334, 0.032087188214063644, 0.035443130880594254 ] } }, "transformer.layers.6.2.to_out.0.weight": { "min": -0.18818390369415283, "max": 0.17026524245738983, "mean": -6.799850234529004e-05, "std": 0.032174814492464066, "abs_mean": 0.02552211657166481, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 32.94668960571289, "elements": 1048576, "histogram": { "counts": [ 1, 0, 5, 4, 12, 13, 32, 56, 87, 113, 150, 151, 136, 94, 54, 59, 17, 9, 3, 4 ], "bin_edges": [ -0.12617486715316772, -0.1149633452296257, -0.10375183075666428, -0.09254030883312225, -0.08132879436016083, -0.0701172724366188, -0.05890575051307678, -0.047694236040115356, -0.036482714116573334, -0.02527119219303131, -0.014059677720069885, -0.0028481557965278625, 0.00836336612701416, 0.019574880599975586, 0.03078639507293701, 0.04199792444705963, 0.05320943892002106, 0.06442095339298248, 0.0756324827671051, 0.08684399724006653, 0.09805550426244736 ] } }, "transformer.layers.6.2.to_out.0.bias": { "min": -0.13918116688728333, "max": 0.13709498941898346, "mean": -0.0025172303430736065, "std": 0.05128452926874161, "abs_mean": 0.04145807772874832, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.642280101776123, "elements": 1024, "histogram": { "counts": [ 6, 6, 17, 27, 42, 53, 88, 81, 94, 104, 113, 80, 71, 69, 51, 47, 21, 18, 10, 2 ], "bin_edges": [ -0.13918116688728333, -0.12536735832691193, -0.11155354976654053, -0.09773974120616913, -0.08392593264579773, -0.07011212408542633, -0.05629831552505493, -0.04248450696468353, -0.028670698404312134, -0.014856889843940735, -0.001043081283569336, 0.012770727276802063, 0.026584535837173462, 0.04039834439754486, 0.05421215295791626, 0.06802596151828766, 0.08183977007865906, 0.09565357863903046, 0.10946738719940186, 0.12328121066093445, 0.13709498941898346 ] } }, "transformer.layers.6.3.g": { "min": 0.4672186076641083, "max": 0.9546743631362915, "mean": 0.6688124537467957, "std": 0.05250026285648346, "abs_mean": 0.6688124537467957, "sparsity": 0.0, "shape": [ 1024 ], "norm": 21.467771530151367, "elements": 1024, "histogram": { "counts": [ 3, 7, 12, 16, 21, 61, 99, 194, 227, 191, 98, 43, 16, 5, 2, 2, 0, 0, 2, 1 ], "bin_edges": [ 0.4672186076641083, 0.4915913939476013, 0.5159642100334167, 0.5403369665145874, 0.5647097826004028, 0.5890825390815735, 0.6134553551673889, 0.6378281116485596, 0.662200927734375, 0.6865736842155457, 0.7109465003013611, 0.7353192567825317, 0.7596920728683472, 0.7840648889541626, 0.8084376454353333, 0.8328104019165039, 0.8571832180023193, 0.8815560340881348, 0.9059287905693054, 0.9303015470504761, 0.9546743631362915 ] } }, "transformer.layers.6.4.ff.0.0.weight": { "min": -0.32424914836883545, "max": 0.3096342980861664, "mean": -1.5644945960957557e-06, "std": 0.04095214605331421, "abs_mean": 0.03239164128899574, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 83.86203002929688, "elements": 4194304, "histogram": { "counts": [ 4, 2, 2, 11, 10, 48, 57, 80, 112, 113, 119, 130, 97, 84, 54, 39, 21, 13, 2, 2 ], "bin_edges": [ -0.13788051903247833, -0.12475286424160004, -0.11162521690130234, -0.09849756211042404, -0.08536991477012634, -0.07224225997924805, -0.05911460518836975, -0.04598695784807205, -0.032859303057193756, -0.01973164826631546, -0.006604000926017761, 0.006523653864860535, 0.01965130865573883, 0.032778963446617126, 0.04590660333633423, 0.059034258127212524, 0.07216191291809082, 0.08528956770896912, 0.09841722249984741, 0.11154486238956451, 0.12467251718044281 ] } }, "transformer.layers.6.4.ff.0.0.bias": { "min": -0.12461961060762405, "max": 0.02530832216143608, "mean": -0.03069971315562725, "std": 0.019789544865489006, "abs_mean": 0.03134232759475708, "sparsity": 0.0, "shape": [ 4096 ], "norm": 2.337536334991455, "elements": 4096, "histogram": { "counts": [ 1, 2, 1, 7, 13, 15, 17, 34, 62, 87, 123, 129, 141, 120, 102, 60, 54, 18, 11, 3 ], "bin_edges": [ -0.10850232094526291, -0.10194070637226105, -0.09537909924983978, -0.08881748467683792, -0.08225587010383606, -0.0756942629814148, -0.06913264840841293, -0.06257103383541107, -0.05600942671298981, -0.049447815865278244, -0.04288620501756668, -0.03632459044456482, -0.029762975871562958, -0.023201368749141693, -0.01663975417613983, -0.010078147053718567, -0.0035165324807167053, 0.0030450820922851562, 0.009606689214706421, 0.016168303787708282, 0.022729910910129547 ] } }, "transformer.layers.6.4.ff.2.weight": { "min": -0.43944308161735535, "max": 0.4446093440055847, "mean": 9.534660784993321e-05, "std": 0.035124197602272034, "abs_mean": 0.027428196743130684, "sparsity": 0.0, "shape": [ 1024, 4096 ], "norm": 71.92691040039062, "elements": 4194304, "histogram": { "counts": [ 1, 0, 1, 3, 2, 9, 34, 89, 147, 227, 205, 166, 72, 28, 9, 4, 1, 0, 1, 1 ], "bin_edges": [ -0.19963732361793518, -0.1794430911540985, -0.15924887359142303, -0.13905464112758636, -0.11886041611433029, -0.09866619110107422, -0.07847195863723755, -0.05827774107456207, -0.0380835086107254, -0.017889276146888733, 0.002304941415786743, 0.022499173879623413, 0.04269340634346008, 0.06288763880729675, 0.08308184146881104, 0.1032760739326477, 0.12347030639648438, 0.14366453886032104, 0.16385877132415771, 0.184052973985672, 0.20424720644950867 ] } }, "transformer.layers.6.4.ff.2.bias": { "min": -0.22425536811351776, "max": 0.051573775708675385, "mean": -0.001182063017040491, "std": 0.018455415964126587, "abs_mean": 0.01334807462990284, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.5914956331253052, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 7, 20, 113, 285, 328, 183, 55, 5 ], "bin_edges": [ -0.22425536811351776, -0.21046391129493713, -0.1966724544763565, -0.18288099765777588, -0.16908954083919525, -0.15529808402061462, -0.141506627202034, -0.12771517038345337, -0.11392371356487274, -0.10013225674629211, -0.08634079992771149, -0.07254934310913086, -0.05875788629055023, -0.044966429471969604, -0.031174972653388977, -0.01738351583480835, -0.003592059016227722, 0.010199397802352905, 0.023990854620933533, 0.037782326340675354, 0.051573775708675385 ] } }, "transformer.layers.7.1.g": { "min": 0.3393731713294983, "max": 0.737841010093689, "mean": 0.5586089491844177, "std": 0.04119626432657242, "abs_mean": 0.5586089491844177, "sparsity": 0.0, "shape": [ 1024 ], "norm": 17.923982620239258, "elements": 1024, "histogram": { "counts": [ 1, 2, 2, 3, 4, 6, 15, 51, 76, 130, 174, 217, 202, 88, 15, 8, 2, 2, 1, 1 ], "bin_edges": [ 0.34397587180137634, 0.36366912722587585, 0.38336238265037537, 0.4030556380748749, 0.4227488934993744, 0.4424421489238739, 0.4621354341506958, 0.4818286895751953, 0.5015219449996948, 0.5212152004241943, 0.5409084558486938, 0.5606017112731934, 0.5802949666976929, 0.5999882221221924, 0.6196814775466919, 0.6393747329711914, 0.6590679883956909, 0.6787612438201904, 0.6984544992446899, 0.7181477546691895, 0.737841010093689 ] } }, "transformer.layers.7.2.to_q.weight": { "min": -0.2723452150821686, "max": 0.2782283425331116, "mean": 1.9915583834517747e-05, "std": 0.04106247052550316, "abs_mean": 0.03229733556509018, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 42.047523498535156, "elements": 1048576, "histogram": { "counts": [ 1, 0, 0, 0, 1, 4, 7, 20, 34, 65, 128, 151, 175, 156, 113, 77, 40, 15, 7, 6 ], "bin_edges": [ -0.21072475612163544, -0.1939927637577057, -0.17726078629493713, -0.16052879393100739, -0.14379680156707764, -0.12706482410430908, -0.11033283174037933, -0.09360084682703018, -0.07686886191368103, -0.06013686954975128, -0.04340489208698273, -0.02667289972305298, -0.00994090735912323, 0.006791070103645325, 0.023523062467575073, 0.04025505483150482, 0.056987032294273376, 0.07371900975704193, 0.09045101702213287, 0.10718299448490143, 0.12391498684883118 ] } }, "transformer.layers.7.2.to_q.bias": { "min": -0.13683027029037476, "max": 0.1396752893924713, "mean": 0.0004885591333732009, "std": 0.026614630594849586, "abs_mean": 0.018938830122351646, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.8513957858085632, "elements": 1024, "histogram": { "counts": [ 1, 2, 4, 1, 5, 9, 22, 64, 164, 241, 249, 147, 53, 17, 5, 4, 5, 3, 2, 2 ], "bin_edges": [ -0.13683027029037476, -0.12300499528646469, -0.10917971283197403, -0.09535443782806396, -0.0815291553735733, -0.06770388036966324, -0.053878605365753174, -0.04005332291126251, -0.026228047907352448, -0.012402772903442383, 0.0014225095510482788, 0.01524779200553894, 0.029073059558868408, 0.04289834201335907, 0.05672362446784973, 0.0705488920211792, 0.08437417447566986, 0.09819945693016052, 0.11202472448348999, 0.12585002183914185, 0.1396752893924713 ] } }, "transformer.layers.7.2.to_k.weight": { "min": -0.49012690782546997, "max": 0.35547417402267456, "mean": 8.882825932232663e-05, "std": 0.04070047289133072, "abs_mean": 0.03201922029256821, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 41.67690658569336, "elements": 1048576, "histogram": { "counts": [ 5, 3, 9, 17, 24, 43, 58, 65, 76, 118, 133, 132, 104, 70, 50, 46, 19, 17, 9, 2 ], "bin_edges": [ -0.12249059975147247, -0.11079084873199463, -0.09909109771251678, -0.08739134669303894, -0.0756915956735611, -0.06399184465408325, -0.052292101085186005, -0.04059235006570816, -0.028892599046230316, -0.017192848026752472, -0.005493097007274628, 0.00620664656162262, 0.017906397581100464, 0.029606148600578308, 0.04130589962005615, 0.053005650639534, 0.06470540165901184, 0.07640515267848969, 0.08810490369796753, 0.09980465471744537, 0.11150440573692322 ] } }, "transformer.layers.7.2.to_k.bias": { "min": -2.2938547134399414, "max": 1.7426533699035645, "mean": -0.021057037636637688, "std": 0.49975258111953735, "abs_mean": 0.35557305812835693, "sparsity": 0.0, "shape": [ 1024 ], "norm": 15.998469352722168, "elements": 1024, "histogram": { "counts": [ 1, 0, 4, 4, 5, 15, 22, 42, 56, 76, 189, 248, 148, 81, 42, 30, 23, 5, 5, 4 ], "bin_edges": [ -2.2938547134399414, -2.092029333114624, -1.8902039527893066, -1.6883784532546997, -1.4865530729293823, -1.284727692604065, -1.082902193069458, -0.8810768127441406, -0.6792514324188232, -0.47742605209350586, -0.2756006717681885, -0.0737752914428711, 0.1280503273010254, 0.3298757076263428, 0.5317010879516602, 0.7335264682769775, 0.9353518486022949, 1.1371772289276123, 1.3390026092529297, 1.540827989578247, 1.7426533699035645 ] } }, "transformer.layers.7.2.to_v.weight": { "min": -0.21735826134681702, "max": 0.19773884117603302, "mean": -4.063967935508117e-05, "std": 0.03423747047781944, "abs_mean": 0.02707846462726593, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 35.05868911743164, "elements": 1048576, "histogram": { "counts": [ 2, 7, 5, 22, 46, 92, 123, 143, 160, 134, 111, 77, 38, 19, 11, 6, 1, 1, 1, 1 ], "bin_edges": [ -0.11161135137081146, -0.09862302988767624, -0.08563470840454102, -0.07264638692140579, -0.05965806543827057, -0.046669743955135345, -0.03368142247200012, -0.0206931009888649, -0.007704779505729675, 0.005283541977405548, 0.01827186346054077, 0.0312601774930954, 0.04424850642681122, 0.05723683536052704, 0.07022514939308167, 0.08321346342563629, 0.09620179235935211, 0.10919012129306793, 0.12217843532562256, 0.13516674935817719, 0.148155078291893 ] } }, "transformer.layers.7.2.to_v.bias": { "min": -0.041265569627285004, "max": 0.03861430287361145, "mean": -0.00014519633259624243, "std": 0.012876993976533413, "abs_mean": 0.010816301219165325, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.4118887782096863, "elements": 1024, "histogram": { "counts": [ 1, 4, 7, 26, 56, 76, 91, 93, 82, 97, 84, 107, 91, 76, 58, 30, 13, 3, 2, 3 ], "bin_edges": [ -0.03600998595356941, -0.032278772443532944, -0.028547557070851326, -0.024816343560814857, -0.02108512818813324, -0.01735391467809677, -0.013622701168060303, -0.009891485795378685, -0.0061602722853422165, -0.0024290569126605988, 0.0013021565973758698, 0.005033370107412338, 0.008764583617448807, 0.012495797127485275, 0.016227014362812042, 0.01995822787284851, 0.02368944138288498, 0.027420658618211746, 0.031151872128248215, 0.03488308563828468, 0.03861430287361145 ] } }, "transformer.layers.7.2.to_out.0.weight": { "min": -0.17728237807750702, "max": 0.18350861966609955, "mean": 4.7603076382074505e-05, "std": 0.031560394912958145, "abs_mean": 0.02491084672510624, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 32.31753921508789, "elements": 1048576, "histogram": { "counts": [ 3, 1, 5, 12, 20, 30, 41, 79, 117, 120, 119, 132, 114, 76, 50, 36, 23, 14, 5, 3 ], "bin_edges": [ -0.1022830680012703, -0.09248834103345871, -0.08269362151622772, -0.07289889454841614, -0.06310416758060455, -0.05330944061279297, -0.04351471737027168, -0.0337199941277504, -0.023925267159938812, -0.014130540192127228, -0.004335813224315643, 0.005458906292915344, 0.015253633260726929, 0.025048352777957916, 0.0348430797457695, 0.044637806713581085, 0.05443253368139267, 0.06422726064920425, 0.07402198761701584, 0.08381671458482742, 0.0936114490032196 ] } }, "transformer.layers.7.2.to_out.0.bias": { "min": -0.1796274185180664, "max": 0.18359197676181793, "mean": -0.0022178757935762405, "std": 0.05480958893895149, "abs_mean": 0.04388166591525078, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.7544862031936646, "elements": 1024, "histogram": { "counts": [ 2, 3, 4, 17, 29, 58, 72, 100, 108, 147, 114, 100, 106, 56, 38, 27, 12, 5, 0, 2 ], "bin_edges": [ -0.1796274185180664, -0.16146644949913025, -0.1433054804801941, -0.12514451146125793, -0.10698354244232178, -0.08882257342338562, -0.07066160440444946, -0.052500635385513306, -0.03433966636657715, -0.01617869734764099, 0.001982271671295166, 0.020143240690231323, 0.03830420970916748, 0.05646517872810364, 0.0746261477470398, 0.09278711676597595, 0.11094808578491211, 0.12910905480384827, 0.14727002382278442, 0.16543099284172058, 0.18359197676181793 ] } }, "transformer.layers.7.3.g": { "min": 0.47430306673049927, "max": 1.0235347747802734, "mean": 0.645234227180481, "std": 0.05006485432386398, "abs_mean": 0.645234227180481, "sparsity": 0.0, "shape": [ 1024 ], "norm": 20.709495544433594, "elements": 1024, "histogram": { "counts": [ 6, 4, 18, 50, 133, 230, 285, 171, 60, 18, 12, 5, 3, 0, 1, 0, 0, 1, 0, 3 ], "bin_edges": [ 0.47430306673049927, 0.5017646551132202, 0.5292262434959412, 0.5566878318786621, 0.5841494202613831, 0.611611008644104, 0.6390725374221802, 0.6665341854095459, 0.6939957141876221, 0.7214573621749878, 0.748918890953064, 0.7763805389404297, 0.8038420677185059, 0.8313036561012268, 0.8587652444839478, 0.8862268328666687, 0.9136884212493896, 0.9411500096321106, 0.9686115980148315, 0.9960731863975525, 1.0235347747802734 ] } }, "transformer.layers.7.4.ff.0.0.weight": { "min": -0.2717384696006775, "max": 0.3092706799507141, "mean": 0.0001124507180065848, "std": 0.04068849980831146, "abs_mean": 0.032301004976034164, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 83.32220458984375, "elements": 4194304, "histogram": { "counts": [ 1, 0, 0, 3, 3, 10, 20, 47, 71, 103, 133, 159, 136, 122, 87, 55, 34, 13, 1, 2 ], "bin_edges": [ -0.18017078936100006, -0.16486860811710358, -0.1495664268732071, -0.13426423072814941, -0.11896204948425293, -0.10365986824035645, -0.08835768699645996, -0.07305549830198288, -0.057753317058086395, -0.042451128363609314, -0.02714894711971283, -0.011846765875816345, 0.003455415368080139, 0.018757596611976624, 0.0340597927570343, 0.049361974000930786, 0.06466415524482727, 0.07996635138988495, 0.09526853263378143, 0.11057071387767792, 0.1258728951215744 ] } }, "transformer.layers.7.4.ff.0.0.bias": { "min": -0.10565188527107239, "max": 0.026852920651435852, "mean": -0.029502389952540398, "std": 0.017905903980135918, "abs_mean": 0.030034106224775314, "sparsity": 0.0, "shape": [ 4096 ], "norm": 2.2086341381073, "elements": 4096, "histogram": { "counts": [ 2, 3, 3, 5, 8, 11, 38, 60, 87, 93, 100, 122, 107, 89, 88, 79, 53, 35, 11, 6 ], "bin_edges": [ -0.09144522249698639, -0.08617058396339417, -0.08089595288038254, -0.07562131434679031, -0.07034668326377869, -0.06507204473018646, -0.05979740619659424, -0.05452277138829231, -0.04924813657999039, -0.04397350177168846, -0.038698866963386536, -0.03342422842979431, -0.028149589896202087, -0.02287495881319046, -0.017600320279598236, -0.012325689196586609, -0.007051050662994385, -0.0017764121294021606, 0.0034982189536094666, 0.00877285748720169, 0.014047490432858467 ] } }, "transformer.layers.7.4.ff.2.weight": { "min": -0.33881059288978577, "max": 0.3287763297557831, "mean": 5.716992018278688e-05, "std": 0.03441813588142395, "abs_mean": 0.02711346372961998, "sparsity": 0.0, "shape": [ 1024, 4096 ], "norm": 70.48104095458984, "elements": 4194304, "histogram": { "counts": [ 2, 1, 9, 12, 35, 58, 128, 119, 155, 151, 127, 79, 64, 29, 17, 9, 3, 1, 0, 1 ], "bin_edges": [ -0.11595162004232407, -0.10283190757036209, -0.08971219509840012, -0.07659248262643814, -0.06347277015447617, -0.05035305768251419, -0.037233345210552216, -0.02411363273859024, -0.010993920266628265, 0.0021257922053337097, 0.015245504677295685, 0.02836521714925766, 0.041484929621219635, 0.05460464209318161, 0.06772435456514359, 0.08084406703710556, 0.09396377950906754, 0.10708349198102951, 0.12020320445299149, 0.13332292437553406, 0.14644262194633484 ] } }, "transformer.layers.7.4.ff.2.bias": { "min": -0.1814029961824417, "max": 0.04198184236884117, "mean": -0.0010715797543525696, "std": 0.017202889546751976, "abs_mean": 0.012709951028227806, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.5512910485267639, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 12, 43, 134, 259, 270, 188, 70, 19 ], "bin_edges": [ -0.1814029961824417, -0.17023375630378723, -0.15906451642513275, -0.14789527654647827, -0.1367260217666626, -0.1255567967891693, -0.11438754200935364, -0.10321830213069916, -0.09204906225204468, -0.0808798223733902, -0.06971058249473572, -0.05854133516550064, -0.047372087836265564, -0.036202847957611084, -0.025033608078956604, -0.013864368200302124, -0.002695128321647644, 0.008474111557006836, 0.019643351435661316, 0.030812591314315796, 0.04198184236884117 ] } }, "transformer.layers.8.1.g": { "min": 0.32546839118003845, "max": 0.6852879524230957, "mean": 0.5111152529716492, "std": 0.036710962653160095, "abs_mean": 0.5111152529716492, "sparsity": 0.0, "shape": [ 1024 ], "norm": 16.397781372070312, "elements": 1024, "histogram": { "counts": [ 2, 0, 4, 2, 3, 9, 28, 67, 112, 187, 208, 192, 99, 62, 16, 6, 2, 0, 0, 1 ], "bin_edges": [ 0.32546839118003845, 0.3434593677520752, 0.36145034432411194, 0.3794413208961487, 0.3974322974681854, 0.41542327404022217, 0.4334142804145813, 0.45140522718429565, 0.4693962335586548, 0.48738718032836914, 0.5053781867027283, 0.5233691334724426, 0.5413601398468018, 0.5593510866165161, 0.5773420929908752, 0.5953330993652344, 0.6133240461349487, 0.6313149929046631, 0.6493059992790222, 0.6672970056533813, 0.6852879524230957 ] } }, "transformer.layers.8.2.to_q.weight": { "min": -0.23360855877399445, "max": 0.22551532089710236, "mean": -3.5930093872593716e-05, "std": 0.039181701838970184, "abs_mean": 0.031050506979227066, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 40.12158203125, "elements": 1048576, "histogram": { "counts": [ 3, 4, 15, 30, 25, 51, 70, 80, 125, 135, 101, 115, 90, 71, 33, 23, 12, 13, 3, 1 ], "bin_edges": [ -0.11901972442865372, -0.10700470209121704, -0.09498967230319977, -0.08297464996576309, -0.07095962762832642, -0.05894460529088974, -0.04692957550287247, -0.03491455316543579, -0.022899530827999115, -0.010884508490562439, 0.001130513846874237, 0.01314554363489151, 0.025160573422908783, 0.03717558830976486, 0.049190618097782135, 0.061205632984638214, 0.07322066277265549, 0.08523569256067276, 0.09725070744752884, 0.10926573723554611, 0.1212807446718216 ] } }, "transformer.layers.8.2.to_q.bias": { "min": -0.11516069620847702, "max": 0.13141536712646484, "mean": 0.00015141721814870834, "std": 0.02916705049574375, "abs_mean": 0.02083246223628521, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.9329023361206055, "elements": 1024, "histogram": { "counts": [ 4, 5, 5, 9, 11, 24, 50, 121, 207, 199, 183, 101, 35, 20, 6, 6, 4, 2, 5, 3 ], "bin_edges": [ -0.11516069620847702, -0.1028318926692009, -0.09050308912992477, -0.07817428559064865, -0.06584548205137253, -0.053516678512096405, -0.04118787497282028, -0.02885907143354416, -0.016530267894268036, -0.004201464354991913, 0.00812733918428421, 0.020456142723560333, 0.032784946262836456, 0.04511374980211258, 0.0574425533413887, 0.06977135688066483, 0.08210016041994095, 0.09442896395921707, 0.1067577674984932, 0.11908657103776932, 0.13141536712646484 ] } }, "transformer.layers.8.2.to_k.weight": { "min": -0.3523465394973755, "max": 0.2849816083908081, "mean": 7.249596819747239e-06, "std": 0.039250195026397705, "abs_mean": 0.031097358092665672, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 40.191776275634766, "elements": 1048576, "histogram": { "counts": [ 8, 14, 21, 31, 63, 85, 122, 125, 149, 116, 88, 68, 57, 28, 13, 5, 5, 1, 0, 1 ], "bin_edges": [ -0.10888364911079407, -0.09555511176586151, -0.08222658187150955, -0.068898044526577, -0.05556951090693474, -0.04224097728729248, -0.028912439942359924, -0.015583910048007965, -0.002255372703075409, 0.011073164641857147, 0.024401694536209106, 0.03773023188114166, 0.05105876922607422, 0.06438730657100677, 0.07771582901477814, 0.0910443663597107, 0.10437290370464325, 0.1177014410495758, 0.13102997839450836, 0.14435851573944092, 0.15768705308437347 ] } }, "transformer.layers.8.2.to_k.bias": { "min": -4.126643180847168, "max": 3.538667678833008, "mean": -0.011556778103113174, "std": 0.681910514831543, "abs_mean": 0.39681142568588257, "sparsity": 0.0, "shape": [ 1024 ], "norm": 21.813613891601562, "elements": 1024, "histogram": { "counts": [ 2, 3, 1, 1, 8, 3, 9, 25, 31, 97, 455, 247, 54, 30, 12, 8, 8, 3, 1, 2 ], "bin_edges": [ -4.126643180847168, -3.743377685546875, -3.360112190246582, -2.97684645652771, -2.593580961227417, -2.210315465927124, -1.827049732208252, -1.443784236907959, -1.060518741607666, -0.677253246307373, -0.2939877510070801, 0.08927774429321289, 0.47254371643066406, 0.855809211730957, 1.23907470703125, 1.622340202331543, 2.005605697631836, 2.388871192932129, 2.772136688232422, 3.155402183532715, 3.538667678833008 ] } }, "transformer.layers.8.2.to_v.weight": { "min": -0.2112656831741333, "max": 0.20894697308540344, "mean": 3.47470777342096e-05, "std": 0.03448949381709099, "abs_mean": 0.027210766449570656, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 35.31674575805664, "elements": 1048576, "histogram": { "counts": [ 2, 2, 8, 11, 10, 25, 52, 79, 113, 131, 149, 116, 107, 68, 55, 31, 18, 16, 3, 4 ], "bin_edges": [ -0.11793059855699539, -0.10670945793390274, -0.09548831731081009, -0.08426717668771744, -0.07304603606462479, -0.061824895441532135, -0.050603754818439484, -0.03938261419534683, -0.02816147357225418, -0.01694033294916153, -0.005719192326068878, 0.005501948297023773, 0.016723088920116425, 0.027944229543209076, 0.03916537016630173, 0.05038651078939438, 0.06160765141248703, 0.07282879203557968, 0.08404993265867233, 0.09527107328176498, 0.10649221390485764 ] } }, "transformer.layers.8.2.to_v.bias": { "min": -0.03565378487110138, "max": 0.0480014868080616, "mean": 0.0007942374795675278, "std": 0.012850471772253513, "abs_mean": 0.010666023939847946, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.4117993414402008, "elements": 1024, "histogram": { "counts": [ 1, 8, 20, 33, 69, 78, 104, 101, 113, 106, 112, 106, 68, 50, 20, 5, 4, 1, 0, 1 ], "bin_edges": [ -0.03565378487110138, -0.031471021473407745, -0.02728825807571411, -0.023105494678020477, -0.018922731280326843, -0.01473996788263321, -0.010557204484939575, -0.006374441087245941, -0.002191677689552307, 0.001991085708141327, 0.006173849105834961, 0.010356612503528595, 0.014539375901222229, 0.018722139298915863, 0.022904902696609497, 0.02708766609430313, 0.031270429491996765, 0.0354531928896904, 0.03963595628738403, 0.04381871968507767, 0.0480014868080616 ] } }, "transformer.layers.8.2.to_out.0.weight": { "min": -0.21031072735786438, "max": 0.19297289848327637, "mean": -1.2874927506345557e-06, "std": 0.03169998526573181, "abs_mean": 0.02499360963702202, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 32.46042251586914, "elements": 1048576, "histogram": { "counts": [ 3, 5, 7, 25, 48, 66, 109, 116, 140, 118, 128, 87, 63, 34, 22, 16, 5, 4, 3, 1 ], "bin_edges": [ -0.09405967593193054, -0.08362150937318802, -0.07318335026502609, -0.06274518370628357, -0.052307020872831345, -0.04186885803937912, -0.0314306914806366, -0.02099253237247467, -0.010554365813732147, -0.00011619925498962402, 0.010321959853172302, 0.020760126411914825, 0.03119829297065735, 0.041636452078819275, 0.0520746111869812, 0.06251278519630432, 0.07295094430446625, 0.08338910341262817, 0.0938272774219513, 0.10426543653011322, 0.11470360308885574 ] } }, "transformer.layers.8.2.to_out.0.bias": { "min": -0.18637274205684662, "max": 0.17692941427230835, "mean": -0.0028488910757005215, "std": 0.05860321223735809, "abs_mean": 0.04695521295070648, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.8766025304794312, "elements": 1024, "histogram": { "counts": [ 5, 0, 8, 13, 27, 45, 71, 90, 106, 130, 123, 102, 91, 74, 49, 26, 21, 13, 3, 3 ], "bin_edges": [ -0.18637274205684662, -0.16820763051509857, -0.15004253387451172, -0.13187742233276367, -0.11371231079101562, -0.09554719924926758, -0.07738209515810013, -0.05921699106693268, -0.04105187952518463, -0.022886767983436584, -0.004721656441688538, 0.013443440198898315, 0.03160855174064636, 0.04977366328239441, 0.06793875992298126, 0.08610387146472931, 0.10426898300647736, 0.1224340945482254, 0.14059920608997345, 0.1587643176317215, 0.17692941427230835 ] } }, "transformer.layers.8.3.g": { "min": 0.47467249631881714, "max": 1.0397725105285645, "mean": 0.6513394117355347, "std": 0.049329087138175964, "abs_mean": 0.6513394117355347, "sparsity": 0.0, "shape": [ 1024 ], "norm": 20.90249252319336, "elements": 1024, "histogram": { "counts": [ 2, 5, 8, 29, 165, 235, 313, 144, 46, 22, 11, 7, 5, 5, 1, 0, 0, 0, 0, 2 ], "bin_edges": [ 0.47467249631881714, 0.5029274821281433, 0.5311825275421143, 0.5594375133514404, 0.5876924991607666, 0.6159474849700928, 0.644202470779419, 0.6724575161933899, 0.7007125020027161, 0.7289674878120422, 0.7572225332260132, 0.7854775190353394, 0.8137325048446655, 0.8419874906539917, 0.8702424764633179, 0.8984975218772888, 0.926752507686615, 0.9550074934959412, 0.9832624793052673, 1.0115175247192383, 1.0397725105285645 ] } }, "transformer.layers.8.4.ff.0.0.weight": { "min": -0.248422771692276, "max": 0.32902756333351135, "mean": 0.00018066739721689373, "std": 0.04057690501213074, "abs_mean": 0.03225279226899147, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 83.09464263916016, "elements": 4194304, "histogram": { "counts": [ 3, 3, 5, 19, 21, 33, 67, 83, 87, 106, 120, 134, 104, 77, 59, 35, 23, 16, 2, 3 ], "bin_edges": [ -0.132222980260849, -0.11964092403650284, -0.10705886781215668, -0.09447681903839111, -0.08189476281404495, -0.06931270658969879, -0.05673065781593323, -0.04414860159158707, -0.031566545367240906, -0.018984489142894745, -0.006402432918548584, 0.00617961585521698, 0.018761664628982544, 0.0313437283039093, 0.043925777077674866, 0.056507840752601624, 0.06908988952636719, 0.08167193830013275, 0.09425400197505951, 0.10683605074882507, 0.11941809952259064 ] } }, "transformer.layers.8.4.ff.0.0.bias": { "min": -0.12427264451980591, "max": 0.024594629183411598, "mean": -0.030488643795251846, "std": 0.017578164115548134, "abs_mean": 0.031011048704385757, "sparsity": 0.0, "shape": [ 4096 ], "norm": 2.2522852420806885, "elements": 4096, "histogram": { "counts": [ 1, 2, 3, 7, 16, 26, 69, 79, 110, 123, 111, 126, 104, 95, 55, 44, 14, 8, 4, 3 ], "bin_edges": [ -0.093555748462677, -0.08764822781085968, -0.08174070715904236, -0.07583319395780563, -0.06992567330598831, -0.06401815265417099, -0.05811063572764397, -0.05220311880111694, -0.04629559814929962, -0.0403880774974823, -0.034480560570955276, -0.028573043644428253, -0.02266552299261093, -0.01675800234079361, -0.010850489139556885, -0.004942968487739563, 0.0009645521640777588, 0.006872072815895081, 0.012779593467712402, 0.018687106668949127, 0.024594629183411598 ] } }, "transformer.layers.8.4.ff.2.weight": { "min": -0.4205840826034546, "max": 0.4813268184661865, "mean": 2.129650965798646e-06, "std": 0.035403117537498474, "abs_mean": 0.027970315888524055, "sparsity": 0.0, "shape": [ 1024, 4096 ], "norm": 72.49813079833984, "elements": 4194304, "histogram": { "counts": [ 4, 8, 18, 34, 62, 76, 122, 128, 138, 117, 113, 72, 46, 36, 9, 10, 3, 3, 0, 1 ], "bin_edges": [ -0.10240344703197479, -0.09038490056991577, -0.07836635410785675, -0.06634780019521713, -0.05432925373315811, -0.04231070727109909, -0.030292153358459473, -0.01827360689640045, -0.006255060434341431, 0.00576348602771759, 0.01778203248977661, 0.029800578951835632, 0.04181914031505585, 0.05383768677711487, 0.06585623323917389, 0.07787477970123291, 0.08989332616329193, 0.10191187262535095, 0.11393041908740997, 0.125948965549469, 0.1379675269126892 ] } }, "transformer.layers.8.4.ff.2.bias": { "min": -0.15161579847335815, "max": 0.043303120881319046, "mean": 3.9640130125917494e-05, "std": 0.014866231009364128, "abs_mean": 0.011253134347498417, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.47548872232437134, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 34, 83, 214, 277, 225, 103, 42, 10 ], "bin_edges": [ -0.15161579847335815, -0.14186985790729523, -0.1321239024400711, -0.12237796187400818, -0.11263201385736465, -0.10288606584072113, -0.0931401252746582, -0.08339417725801468, -0.07364822924137115, -0.06390228122472763, -0.054156333208084106, -0.04441039264202118, -0.034664444625377655, -0.02491849660873413, -0.015172556042671204, -0.0054266005754470825, 0.004319339990615845, 0.014065280556678772, 0.023811236023902893, 0.03355717658996582, 0.043303120881319046 ] } }, "transformer.layers.9.1.g": { "min": 0.3155551552772522, "max": 0.6806549429893494, "mean": 0.5528165102005005, "std": 0.04051704332232475, "abs_mean": 0.5528165102005005, "sparsity": 0.0, "shape": [ 1024 ], "norm": 17.737529754638672, "elements": 1024, "histogram": { "counts": [ 3, 1, 0, 1, 2, 6, 7, 8, 19, 32, 61, 115, 178, 227, 186, 114, 30, 6, 3, 1 ], "bin_edges": [ 0.3155551552772522, 0.33381015062332153, 0.35206514596939087, 0.3703201413154602, 0.38857510685920715, 0.4068301022052765, 0.4250850975513458, 0.4433400630950928, 0.4615950584411621, 0.47985005378723145, 0.4981050491333008, 0.5163600444793701, 0.5346150398254395, 0.5528700351715088, 0.5711250305175781, 0.5893800258636475, 0.6076350212097168, 0.6258900165557861, 0.6441450119018555, 0.66239994764328, 0.6806549429893494 ] } }, "transformer.layers.9.2.to_q.weight": { "min": -0.2062118798494339, "max": 0.21964126825332642, "mean": 3.0860355764161795e-05, "std": 0.038303423672914505, "abs_mean": 0.030420470982789993, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 39.222164154052734, "elements": 1048576, "histogram": { "counts": [ 5, 2, 11, 16, 24, 47, 70, 100, 136, 138, 143, 101, 77, 56, 34, 31, 5, 2, 0, 2 ], "bin_edges": [ -0.12023625522851944, -0.10769837349653244, -0.09516048431396484, -0.08262260258197784, -0.07008472084999084, -0.057546839118003845, -0.04500894993543625, -0.03247106820344925, -0.01993318647146225, -0.00739530473947525, 0.005142576992511749, 0.01768045872449875, 0.030218355357646942, 0.04275623708963394, 0.05529411882162094, 0.06783200055360794, 0.08036988228559494, 0.09290776401758194, 0.10544564574956894, 0.11798352748155594, 0.13052140176296234 ] } }, "transformer.layers.9.2.to_q.bias": { "min": -0.1376407891511917, "max": 0.11259414255619049, "mean": 2.069001493509859e-05, "std": 0.02579990215599537, "abs_mean": 0.01859201118350029, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.8251938819885254, "elements": 1024, "histogram": { "counts": [ 1, 1, 2, 3, 2, 6, 7, 29, 72, 151, 221, 233, 145, 77, 20, 12, 6, 5, 3, 4 ], "bin_edges": [ -0.1376407891511917, -0.12512904405593872, -0.11261729896068573, -0.10010554641485214, -0.08759380131959915, -0.07508205622434616, -0.06257030367851257, -0.05005855858325958, -0.03754681348800659, -0.0250350683927536, -0.01252332329750061, -1.1578202247619629e-05, 0.012500181794166565, 0.025011926889419556, 0.037523671984672546, 0.05003541707992554, 0.06254716217517853, 0.07505890727043152, 0.08757065236568451, 0.1000823974609375, 0.11259414255619049 ] } }, "transformer.layers.9.2.to_k.weight": { "min": -0.40213435888290405, "max": 0.3705216944217682, "mean": 2.6252395400661044e-05, "std": 0.03818526491522789, "abs_mean": 0.030293822288513184, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 39.10115051269531, "elements": 1048576, "histogram": { "counts": [ 1, 0, 0, 4, 14, 24, 53, 66, 91, 122, 125, 142, 119, 87, 58, 35, 27, 17, 11, 4 ], "bin_edges": [ -0.13659250736236572, -0.12421666085720062, -0.11184081435203552, -0.09946496039628983, -0.08708911389112473, -0.07471326738595963, -0.06233741343021393, -0.04996156692504883, -0.03758572041988373, -0.025209873914718628, -0.012834027409553528, -0.00045818090438842773, 0.011917680501937866, 0.024293527007102966, 0.036669373512268066, 0.049045220017433167, 0.06142106652259827, 0.07379691302776337, 0.08617275953292847, 0.09854860603809357, 0.11092444509267807 ] } }, "transformer.layers.9.2.to_k.bias": { "min": -3.765413761138916, "max": 2.86456298828125, "mean": 0.0011342763900756836, "std": 0.5163310766220093, "abs_mean": 0.3170565068721771, "sparsity": 0.0, "shape": [ 1024 ], "norm": 16.514562606811523, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 1, 2, 6, 3, 6, 23, 64, 221, 446, 145, 39, 21, 9, 3, 2, 6, 2 ], "bin_edges": [ -3.765413761138916, -3.43391489982605, -3.1024160385131836, -2.7709174156188965, -2.439418315887451, -2.107919692993164, -1.7764208316802979, -1.4449219703674316, -1.1134231090545654, -0.7819242477416992, -0.450425386428833, -0.1189265251159668, 0.2125720977783203, 0.5440711975097656, 0.8755698204040527, 1.207068920135498, 1.5385675430297852, 1.8700661659240723, 2.2015652656555176, 2.5330638885498047, 2.86456298828125 ] } }, "transformer.layers.9.2.to_v.weight": { "min": -0.20278441905975342, "max": 0.1972842514514923, "mean": 2.9531782274716534e-05, "std": 0.034300558269023895, "abs_mean": 0.027055270969867706, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 35.123321533203125, "elements": 1048576, "histogram": { "counts": [ 2, 1, 4, 9, 21, 35, 57, 60, 83, 141, 126, 153, 115, 89, 43, 31, 17, 10, 1, 2 ], "bin_edges": [ -0.12183795869350433, -0.11040417850017548, -0.09897040575742722, -0.08753662556409836, -0.0761028528213501, -0.06466907262802124, -0.05323529243469238, -0.04180151969194412, -0.030367739498615265, -0.018933959305286407, -0.007500186562538147, 0.0039335936307907104, 0.015367373824119568, 0.026801154017448425, 0.03823491930961609, 0.049668699502944946, 0.061102479696273804, 0.07253625988960266, 0.08397004008293152, 0.09540380537509918, 0.10683758556842804 ] } }, "transformer.layers.9.2.to_v.bias": { "min": -0.05089922249317169, "max": 0.03997639939188957, "mean": -0.00041936602792702615, "std": 0.013420597650110722, "abs_mean": 0.01112040039151907, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.4294591248035431, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 4, 7, 21, 46, 88, 98, 116, 98, 118, 117, 107, 75, 65, 22, 12, 2, 3 ], "bin_edges": [ -0.05089922249317169, -0.046355441212654114, -0.041811659932136536, -0.03726787865161896, -0.03272409737110138, -0.0281803160905838, -0.023636534810066223, -0.019092753529548645, -0.014548972249031067, -0.010005190968513489, -0.005461409687995911, -0.0009176284074783325, 0.0036261528730392456, 0.008169934153556824, 0.012713715434074402, 0.01725749671459198, 0.021801277995109558, 0.026345059275627136, 0.030888840556144714, 0.03543262183666229, 0.03997639939188957 ] } }, "transformer.layers.9.2.to_out.0.weight": { "min": -0.19621425867080688, "max": 0.20147208869457245, "mean": -1.2328569937380962e-05, "std": 0.0318082757294178, "abs_mean": 0.02506079152226448, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 32.571311950683594, "elements": 1048576, "histogram": { "counts": [ 1, 2, 0, 1, 3, 11, 16, 54, 100, 160, 153, 179, 142, 79, 62, 22, 11, 2, 1, 1 ], "bin_edges": [ -0.1516515016555786, -0.1376262605190277, -0.123601034283638, -0.1095757931470871, -0.09555055946111679, -0.08152532577514648, -0.06750008463859558, -0.053474850952625275, -0.03944961726665497, -0.025424376130104065, -0.011399149894714355, 0.002626091241836548, 0.01665133237838745, 0.03067655861377716, 0.044701799750328064, 0.05872702598571777, 0.07275226712226868, 0.08677750825881958, 0.10080274939537048, 0.114827960729599, 0.1288532018661499 ] } }, "transformer.layers.9.2.to_out.0.bias": { "min": -0.19283677637577057, "max": 0.1948237270116806, "mean": -0.002969849156215787, "std": 0.06253352016210556, "abs_mean": 0.050027262419462204, "sparsity": 0.0, "shape": [ 1024 ], "norm": 2.002351760864258, "elements": 1024, "histogram": { "counts": [ 5, 4, 8, 16, 37, 49, 81, 89, 116, 124, 111, 112, 82, 67, 41, 21, 28, 5, 1, 3 ], "bin_edges": [ -0.19283677637577057, -0.17345374822616577, -0.15407073497772217, -0.13468770682811737, -0.11530467867851257, -0.09592165052890778, -0.07653862982988358, -0.057155609130859375, -0.03777258098125458, -0.01838955283164978, 0.000993475317955017, 0.02037648856639862, 0.03975951671600342, 0.059142544865608215, 0.07852555811405182, 0.09790860116481781, 0.11729161441326141, 0.13667462766170502, 0.156057670712471, 0.1754406839609146, 0.1948237270116806 ] } }, "transformer.layers.9.3.g": { "min": 0.34950727224349976, "max": 1.081899642944336, "mean": 0.6671000123023987, "std": 0.05490493029356003, "abs_mean": 0.6671000123023987, "sparsity": 0.0, "shape": [ 1024 ], "norm": 21.419307708740234, "elements": 1024, "histogram": { "counts": [ 1, 1, 1, 0, 2, 9, 54, 230, 383, 209, 60, 21, 13, 8, 3, 2, 0, 0, 1, 2 ], "bin_edges": [ 0.34950727224349976, 0.38612687587738037, 0.4227465093135834, 0.4593661427497864, 0.495985746383667, 0.5326053500175476, 0.5692249536514282, 0.6058446168899536, 0.6424642205238342, 0.6790838241577148, 0.7157034873962402, 0.7523230910301208, 0.7889426946640015, 0.8255622982978821, 0.8621819615364075, 0.8988015651702881, 0.9354211688041687, 0.9720407724380493, 1.0086603164672852, 1.0452799797058105, 1.081899642944336 ] } }, "transformer.layers.9.4.ff.0.0.weight": { "min": -0.22493921220302582, "max": 0.2511034309864044, "mean": 0.0003591308486647904, "std": 0.04076593369245529, "abs_mean": 0.03243311122059822, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 83.48428344726562, "elements": 4194304, "histogram": { "counts": [ 4, 3, 7, 12, 22, 55, 71, 91, 127, 144, 126, 112, 96, 46, 38, 26, 13, 5, 1, 1 ], "bin_edges": [ -0.1372128278017044, -0.1232616975903511, -0.1093105673789978, -0.0953594297170639, -0.0814082995057106, -0.0674571692943573, -0.0535060316324234, -0.0395549014210701, -0.025603771209716797, -0.011652633547782898, 0.002298489212989807, 0.016249626874923706, 0.030200764536857605, 0.04415188729763031, 0.05810302495956421, 0.07205414772033691, 0.08600528538227081, 0.09995642304420471, 0.11390756070613861, 0.12785868346691132, 0.14180982112884521 ] } }, "transformer.layers.9.4.ff.0.0.bias": { "min": -0.09088904410600662, "max": 0.04371574521064758, "mean": -0.030075963586568832, "std": 0.01758558303117752, "abs_mean": 0.030652616173028946, "sparsity": 0.0, "shape": [ 4096 ], "norm": 2.229682445526123, "elements": 4096, "histogram": { "counts": [ 2, 7, 8, 11, 25, 52, 87, 108, 124, 135, 127, 114, 87, 52, 36, 16, 5, 1, 1, 2 ], "bin_edges": [ -0.09088904410600662, -0.08454560488462448, -0.07820217311382294, -0.0718587338924408, -0.06551529467105865, -0.05917186290025711, -0.05282842367887497, -0.04648498818278313, -0.040141552686691284, -0.03379811719059944, -0.0274546816945076, -0.021111242473125458, -0.014767803251743317, -0.008424371480941772, -0.0020809322595596313, 0.004262499511241913, 0.010605938732624054, 0.016949377954006195, 0.02329280972480774, 0.02963624894618988, 0.03597967326641083 ] } }, "transformer.layers.9.4.ff.2.weight": { "min": -0.35314324498176575, "max": 0.303651362657547, "mean": -4.348178117652424e-05, "std": 0.03712818771600723, "abs_mean": 0.029329190030694008, "sparsity": 0.0, "shape": [ 1024, 4096 ], "norm": 76.03089904785156, "elements": 4194304, "histogram": { "counts": [ 1, 4, 0, 7, 13, 36, 53, 71, 88, 107, 154, 126, 119, 99, 65, 31, 16, 6, 3, 1 ], "bin_edges": [ -0.13770392537117004, -0.12496820092201233, -0.11223247647285461, -0.0994967445731163, -0.08676102012395859, -0.07402529567480087, -0.06128956377506256, -0.048553839325904846, -0.03581811487674713, -0.023082390427589417, -0.010346665978431702, 0.002389058470726013, 0.015124797821044922, 0.027860522270202637, 0.04059624671936035, 0.053331971168518066, 0.06606769561767578, 0.0788034200668335, 0.09153914451599121, 0.10427486896514893, 0.11701059341430664 ] } }, "transformer.layers.9.4.ff.2.bias": { "min": -0.16180230677127838, "max": 0.0634349063038826, "mean": -8.249300299212337e-05, "std": 0.019394585862755775, "abs_mean": 0.01485530100762844, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.6203292012214661, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 0, 0, 0, 0, 2, 2, 2, 16, 47, 127, 215, 238, 190, 103, 44, 10, 3 ], "bin_edges": [ -0.16180230677127838, -0.15054044127464294, -0.1392785906791687, -0.12801672518253326, -0.11675485968589783, -0.10549300163984299, -0.09423114359378815, -0.08296927809715271, -0.07170742005109787, -0.06044556200504303, -0.04918369650840759, -0.03792183846235275, -0.026659980416297913, -0.015398114919662476, -0.004136249423027039, 0.007125601172447205, 0.01838746666908264, 0.02964933216571808, 0.04091118276119232, 0.05217304825782776, 0.0634349063038826 ] } }, "transformer.layers.10.1.g": { "min": 0.34883353114128113, "max": 0.7206243872642517, "mean": 0.5422865748405457, "std": 0.03884800896048546, "abs_mean": 0.5422865748405457, "sparsity": 0.0, "shape": [ 1024 ], "norm": 17.39759635925293, "elements": 1024, "histogram": { "counts": [ 2, 1, 1, 7, 9, 15, 16, 52, 106, 161, 227, 178, 164, 39, 13, 4, 2, 2, 0, 1 ], "bin_edges": [ 0.34883353114128113, 0.36742308735847473, 0.38601261377334595, 0.40460216999053955, 0.42319169640541077, 0.44178125262260437, 0.4603707790374756, 0.4789603352546692, 0.4975498914718628, 0.5161393880844116, 0.5347289443016052, 0.5533185005187988, 0.5719080567359924, 0.590497612953186, 0.6090871095657349, 0.6276766657829285, 0.6462662220001221, 0.6648557782173157, 0.6834453344345093, 0.7020348310470581, 0.7206243872642517 ] } }, "transformer.layers.10.2.to_q.weight": { "min": -0.21920670568943024, "max": 0.22291362285614014, "mean": -1.1165878277097363e-05, "std": 0.039236169308423996, "abs_mean": 0.03101443126797676, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 40.177345275878906, "elements": 1048576, "histogram": { "counts": [ 1, 7, 9, 18, 34, 65, 69, 124, 104, 134, 113, 95, 74, 64, 40, 18, 13, 8, 7, 3 ], "bin_edges": [ -0.11750198900699615, -0.10525693744421005, -0.09301188588142395, -0.08076683431863785, -0.06852178275585175, -0.05627673119306564, -0.04403167963027954, -0.03178662806749344, -0.019541576504707336, -0.007296524941921234, 0.004948526620864868, 0.017193570733070374, 0.029438629746437073, 0.04168368875980377, 0.05392873287200928, 0.06617377698421478, 0.07841883599758148, 0.09066389501094818, 0.10290893912315369, 0.11515398323535919, 0.1273990422487259 ] } }, "transformer.layers.10.2.to_q.bias": { "min": -0.11826413869857788, "max": 0.17058128118515015, "mean": 0.0002835137420333922, "std": 0.02510087564587593, "abs_mean": 0.017988789826631546, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.8028870820999146, "elements": 1024, "histogram": { "counts": [ 3, 1, 2, 9, 14, 44, 131, 233, 278, 180, 62, 23, 10, 1, 5, 1, 1, 1, 0, 1 ], "bin_edges": [ -0.11826413869857788, -0.10382186621427536, -0.08937959372997284, -0.07493732869625092, -0.060495056211948395, -0.046052783727645874, -0.03161051869392395, -0.01716824620962143, -0.0027259737253189087, 0.011716291308403015, 0.026158571243286133, 0.04060083627700806, 0.05504310131072998, 0.0694853812456131, 0.08392764627933502, 0.09836992621421814, 0.11281219124794006, 0.127254456281662, 0.1416967213153839, 0.15613901615142822, 0.17058128118515015 ] } }, "transformer.layers.10.2.to_k.weight": { "min": -0.2464587390422821, "max": 0.3006129264831543, "mean": -3.662023664219305e-05, "std": 0.03893572464585304, "abs_mean": 0.030795468017458916, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 39.86971664428711, "elements": 1048576, "histogram": { "counts": [ 1, 1, 1, 2, 13, 22, 27, 48, 84, 117, 142, 154, 127, 100, 77, 41, 24, 10, 5, 4 ], "bin_edges": [ -0.166384756565094, -0.15166360139846802, -0.13694246113300323, -0.12222130596637726, -0.10750015825033188, -0.0927790105342865, -0.07805785536766052, -0.06333670765161514, -0.04861555993556976, -0.03389440476894379, -0.019173264503479004, -0.004452109336853027, 0.01026904582977295, 0.024990186095237732, 0.03971134126186371, 0.05443248152732849, 0.06915363669395447, 0.08387479186058044, 0.09859594702720642, 0.11331707239151001, 0.1280382126569748 ] } }, "transformer.layers.10.2.to_k.bias": { "min": -3.4999661445617676, "max": 3.709076166152954, "mean": 0.015840880572795868, "std": 0.7814859747886658, "abs_mean": 0.422378271818161, "sparsity": 0.0, "shape": [ 1024 ], "norm": 25.00047492980957, "elements": 1024, "histogram": { "counts": [ 5, 3, 10, 12, 5, 7, 14, 25, 122, 453, 230, 43, 24, 10, 7, 7, 6, 7, 6, 4 ], "bin_edges": [ -3.4999661445617676, -3.1395139694213867, -2.779061794281006, -2.418609857559204, -2.0581576824188232, -1.6977055072784424, -1.3372535705566406, -0.9768013954162598, -0.6163492202758789, -0.25589704513549805, 0.10455513000488281, 0.46500706672668457, 0.8254590034484863, 1.1859111785888672, 1.546363353729248, 1.906815528869629, 2.2672677040100098, 2.6277198791503906, 2.9881720542907715, 3.3486242294311523, 3.709076166152954 ] } }, "transformer.layers.10.2.to_v.weight": { "min": -0.2185182124376297, "max": 0.23746132850646973, "mean": -1.3619632227346301e-05, "std": 0.03630794584751129, "abs_mean": 0.028688525781035423, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 37.17878723144531, "elements": 1048576, "histogram": { "counts": [ 6, 7, 15, 26, 37, 65, 85, 112, 136, 134, 136, 99, 60, 44, 22, 6, 6, 1, 2, 1 ], "bin_edges": [ -0.1074351966381073, -0.09535156935453415, -0.083267942070961, -0.07118430733680725, -0.0591006837785244, -0.04701705649495125, -0.0349334254860878, -0.02284979820251465, -0.010766170918941498, 0.0013174563646316528, 0.013401083648204803, 0.025484710931777954, 0.0375683456659317, 0.049651965498924255, 0.061735600233078, 0.07381922006607056, 0.0859028548002243, 0.09798648953437805, 0.1100701093673706, 0.12215374410152435, 0.1342373639345169 ] } }, "transformer.layers.10.2.to_v.bias": { "min": -0.04712348431348801, "max": 0.05133059248328209, "mean": 0.00048102246364578605, "std": 0.01351132895797491, "abs_mean": 0.011189396493136883, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.43242543935775757, "elements": 1024, "histogram": { "counts": [ 2, 0, 1, 8, 17, 48, 73, 112, 100, 145, 88, 122, 125, 88, 43, 16, 8, 1, 0, 3 ], "bin_edges": [ -0.04712348431348801, -0.042412735521793365, -0.037701983004808426, -0.032991234213113785, -0.028280483558773994, -0.023569732904434204, -0.018858984112739563, -0.014148231595754623, -0.009437482804059982, -0.004726734012365341, -1.598149538040161e-05, 0.0046947672963142395, 0.00940551608800888, 0.01411626860499382, 0.01882702112197876, 0.0235377699136734, 0.028248518705368042, 0.03295926749706268, 0.037670016288757324, 0.04238077253103256, 0.0470915250480175 ] } }, "transformer.layers.10.2.to_out.0.weight": { "min": -0.21373434364795685, "max": 0.2173190861940384, "mean": 5.650868115480989e-05, "std": 0.033619917929172516, "abs_mean": 0.026529820635914803, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 34.42643737792969, "elements": 1048576, "histogram": { "counts": [ 2, 0, 4, 11, 29, 51, 71, 109, 127, 145, 154, 123, 85, 35, 33, 7, 7, 6, 0, 1 ], "bin_edges": [ -0.12208394706249237, -0.10931171476840973, -0.09653948247432709, -0.08376725018024445, -0.0709950178861618, -0.05822278559207916, -0.04545055329799652, -0.03267832100391388, -0.019906088709831238, -0.007133856415748596, 0.005638375878334045, 0.018410608172416687, 0.03118284046649933, 0.04395507276058197, 0.05672730505466461, 0.06949953734874725, 0.0822717696428299, 0.09504400193691254, 0.10781623423099518, 0.12058846652507782, 0.13336071372032166 ] } }, "transformer.layers.10.2.to_out.0.bias": { "min": -0.21108141541481018, "max": 0.23115544021129608, "mean": -0.005106039810925722, "std": 0.06184696406126022, "abs_mean": 0.04949750006198883, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.984872817993164, "elements": 1024, "histogram": { "counts": [ 2, 2, 6, 14, 38, 66, 84, 101, 142, 156, 111, 116, 79, 38, 22, 12, 9, 1, 0, 1 ], "bin_edges": [ -0.21108141541481018, -0.18896956741809845, -0.1668577343225479, -0.14474588632583618, -0.12263404577970505, -0.10052220523357391, -0.07841035723686218, -0.056298524141311646, -0.034186676144599915, -0.012074828147888184, 0.010037004947662354, 0.032148852944374084, 0.054260700941085815, 0.07637253403663635, 0.09848436713218689, 0.12059623003005981, 0.14270806312561035, 0.1648198962211609, 0.1869317591190338, 0.20904359221458435, 0.23115544021129608 ] } }, "transformer.layers.10.3.g": { "min": 0.36205485463142395, "max": 1.099104642868042, "mean": 0.6992122530937195, "std": 0.05326760187745094, "abs_mean": 0.6992122530937195, "sparsity": 0.0, "shape": [ 1024 ], "norm": 22.439565658569336, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 2, 0, 3, 26, 114, 352, 309, 129, 32, 16, 7, 0, 3, 2, 2, 1, 1 ], "bin_edges": [ 0.36205485463142395, 0.398907333612442, 0.43575984239578247, 0.47261232137680054, 0.5094648003578186, 0.5463173389434814, 0.5831698179244995, 0.6200222969055176, 0.6568747758865356, 0.6937272548675537, 0.7305797338485718, 0.7674322128295898, 0.8042846918106079, 0.8411372303962708, 0.8779897689819336, 0.9148422479629517, 0.9516947269439697, 0.9885472059249878, 1.0253996849060059, 1.062252163887024, 1.099104642868042 ] } }, "transformer.layers.10.4.ff.0.0.weight": { "min": -0.23436696827411652, "max": 0.24465103447437286, "mean": 0.00046349139302037656, "std": 0.04127480834722519, "abs_mean": 0.03279660642147064, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 84.52825164794922, "elements": 4194304, "histogram": { "counts": [ 2, 1, 5, 16, 20, 33, 69, 63, 104, 125, 136, 118, 96, 79, 57, 36, 26, 8, 3, 3 ], "bin_edges": [ -0.13340413570404053, -0.12061960995197296, -0.1078350841999054, -0.09505055844783783, -0.08226603269577026, -0.0694815069437027, -0.05669698119163513, -0.043912455439567566, -0.0311279296875, -0.018343403935432434, -0.005558878183364868, 0.007225647568702698, 0.020010173320770264, 0.03279469907283783, 0.045579224824905396, 0.05836375057697296, 0.07114827632904053, 0.0839328020811081, 0.09671732783317566, 0.10950185358524323, 0.12228637933731079 ] } }, "transformer.layers.10.4.ff.0.0.bias": { "min": -0.09793505817651749, "max": 0.0681939497590065, "mean": -0.03142588585615158, "std": 0.0180974081158638, "abs_mean": 0.03204537183046341, "sparsity": 0.0, "shape": [ 4096 ], "norm": 2.320847511291504, "elements": 4096, "histogram": { "counts": [ 3, 6, 9, 27, 71, 93, 155, 162, 161, 153, 92, 39, 21, 4, 0, 2, 0, 1, 0, 1 ], "bin_edges": [ -0.09469368308782578, -0.0865493044257164, -0.07840491831302643, -0.07026053965091705, -0.06211615726351738, -0.053971774876117706, -0.04582739621400833, -0.03768301382660866, -0.029538631439208984, -0.02139425277709961, -0.013249866664409637, -0.0051054880023002625, 0.0030388906598091125, 0.011183276772499084, 0.01932765543460846, 0.02747204154729843, 0.035616420209407806, 0.04376079887151718, 0.051905177533626556, 0.060049571096897125, 0.0681939497590065 ] } }, "transformer.layers.10.4.ff.2.weight": { "min": -0.3012528717517853, "max": 0.3511028289794922, "mean": -8.16234532976523e-05, "std": 0.04028059542179108, "abs_mean": 0.0316866971552372, "sparsity": 0.0, "shape": [ 1024, 4096 ], "norm": 82.4867172241211, "elements": 4194304, "histogram": { "counts": [ 1, 1, 10, 18, 38, 51, 71, 114, 117, 159, 124, 117, 80, 53, 23, 11, 8, 2, 1, 1 ], "bin_edges": [ -0.13198897242546082, -0.11771106719970703, -0.10343316197395325, -0.08915524929761887, -0.07487734407186508, -0.0605994388461113, -0.046321526169776917, -0.03204362094402313, -0.017765715718269348, -0.003487810492515564, 0.01079009473323822, 0.025067999958992004, 0.03934592008590698, 0.05362382531166077, 0.06790173053741455, 0.08217963576316833, 0.09645754098892212, 0.1107354462146759, 0.1250133514404297, 0.13929125666618347, 0.15356916189193726 ] } }, "transformer.layers.10.4.ff.2.bias": { "min": -0.15210135281085968, "max": 0.14944450557231903, "mean": 0.00025588623248040676, "std": 0.023021480068564415, "abs_mean": 0.017337616533041, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.7363730669021606, "elements": 1024, "histogram": { "counts": [ 1, 0, 1, 1, 0, 5, 13, 55, 130, 260, 280, 168, 68, 13, 3, 1, 0, 0, 0, 1 ], "bin_edges": [ -0.15210135281085968, -0.13702405989170074, -0.12194676697254181, -0.10686947405338287, -0.09179218113422394, -0.076714888215065, -0.06163759529590607, -0.04656030237674713, -0.031483009457588196, -0.01640571653842926, -0.0013284236192703247, 0.01374886929988861, 0.028826162219047546, 0.04390345513820648, 0.05898074805736542, 0.07405804097652435, 0.08913533389568329, 0.10421262681484222, 0.11928991973400116, 0.1343672126531601, 0.14944450557231903 ] } }, "transformer.layers.11.1.g": { "min": 1.0, "max": 1.0, "mean": 1.0, "std": 0.0, "abs_mean": 1.0, "sparsity": 0.0, "shape": [ 1024 ], "norm": 32.0, "elements": 1024, "histogram": { "counts": [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], "bin_edges": [ 0.5, 0.550000011920929, 0.6000000238418579, 0.6499999761581421, 0.699999988079071, 0.75, 0.800000011920929, 0.8500000238418579, 0.8999999761581421, 0.9500000476837158, 1.0, 1.0499999523162842, 1.100000023841858, 1.1500000953674316, 1.2000000476837158, 1.25, 1.2999999523162842, 1.350000023841858, 1.4000000953674316, 1.4500000476837158, 1.5 ] } }, "transformer.layers.11.2.to_q.weight": { "min": -0.031249936670064926, "max": 0.031249839812517166, "mean": -1.9292721844976768e-05, "std": 0.01804409734904766, "abs_mean": 0.01562901958823204, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 18.476978302001953, "elements": 1048576, "histogram": { "counts": [ 51, 50, 42, 43, 50, 63, 49, 61, 45, 43, 45, 51, 50, 60, 38, 47, 53, 58, 50, 51 ], "bin_edges": [ -0.031167268753051758, -0.02804918773472309, -0.024931106716394424, -0.021813027560710907, -0.01869494467973709, -0.015576864592730999, -0.012458784505724907, -0.00934070348739624, -0.0062226224690675735, -0.003104541450738907, 1.3539567589759827e-05, 0.0031316205859184265, 0.006249699741601944, 0.00936778262257576, 0.012485861778259277, 0.015603944659233093, 0.01872202381491661, 0.021840102970600128, 0.024958185851573944, 0.02807626500725746, 0.031194347888231277 ] } }, "transformer.layers.11.2.to_q.bias": { "min": -0.031226642429828644, "max": 0.03100142627954483, "mean": -0.0010842883493751287, "std": 0.01795371063053608, "abs_mean": 0.015566134825348854, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.5752854347229004, "elements": 1024, "histogram": { "counts": [ 54, 50, 71, 46, 41, 56, 52, 54, 38, 53, 48, 59, 49, 49, 47, 47, 53, 47, 43, 43 ], "bin_edges": [ -0.031226642429828644, -0.02811523899435997, -0.025003835558891296, -0.021892432123422623, -0.01878102868795395, -0.015669625252485275, -0.012558221817016602, -0.009446818381547928, -0.006335414946079254, -0.0032240115106105804, -0.00011260807514190674, 0.002998795360326767, 0.006110198795795441, 0.009221602231264114, 0.012333005666732788, 0.015444409102201462, 0.018555812537670135, 0.02166721597313881, 0.024778619408607483, 0.027890022844076157, 0.03100142627954483 ] } }, "transformer.layers.11.2.to_k.weight": { "min": -0.031249966472387314, "max": 0.031249895691871643, "mean": 3.5441100862954045e-06, "std": 0.018044503405690193, "abs_mean": 0.015626542270183563, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 18.477415084838867, "elements": 1048576, "histogram": { "counts": [ 47, 54, 39, 49, 49, 44, 41, 44, 43, 55, 57, 63, 52, 52, 39, 66, 51, 43, 61, 51 ], "bin_edges": [ -0.031233858317136765, -0.028110237792134285, -0.024986617267131805, -0.021862998604774475, -0.018739378079771996, -0.015615757554769516, -0.012492138892412186, -0.009368518367409706, -0.0062448978424072266, -0.003121277317404747, 2.343207597732544e-06, 0.003125961869955063, 0.006249580532312393, 0.009373202919960022, 0.012496821582317352, 0.015620443969964981, 0.01874406263232231, 0.02186768129467964, 0.02499130368232727, 0.0281149223446846, 0.03123854473233223 ] } }, "transformer.layers.11.2.to_k.bias": { "min": -0.031156372278928757, "max": 0.031184475868940353, "mean": 0.0003338930255267769, "std": 0.018065759912133217, "abs_mean": 0.01575113646686077, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.57792067527771, "elements": 1024, "histogram": { "counts": [ 48, 56, 52, 40, 48, 42, 63, 44, 55, 45, 40, 49, 56, 49, 55, 52, 48, 60, 55, 43 ], "bin_edges": [ -0.031156372278928757, -0.02803933061659336, -0.024922287091612816, -0.02180524542927742, -0.018688201904296875, -0.01557116024196148, -0.012454118579626083, -0.009337075054645538, -0.0062200333923101425, -0.0031029917299747467, 1.405179500579834e-05, 0.0031310953199863434, 0.00624813511967659, 0.009365178644657135, 0.01248222216963768, 0.015599261969327927, 0.01871630549430847, 0.021833349019289017, 0.024950388818979263, 0.02806743234395981, 0.031184475868940353 ] } }, "transformer.layers.11.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "abs_mean": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ], "norm": 0.0, "elements": 1048576, "histogram": { "counts": [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], "bin_edges": [ -0.5, -0.44999998807907104, -0.4000000059604645, -0.3499999940395355, -0.30000001192092896, -0.25, -0.19999998807907104, -0.15000000596046448, -0.09999999403953552, -0.04999998211860657, 0.0, 0.050000011920928955, 0.10000002384185791, 0.15000003576278687, 0.19999998807907104, 0.25, 0.30000001192092896, 0.3500000238418579, 0.40000003576278687, 0.44999998807907104, 0.5 ] } }, "transformer.layers.11.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "abs_mean": 0.0, "sparsity": 1.0, "shape": [ 1024 ], "norm": 0.0, "elements": 1024, "histogram": { "counts": [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], "bin_edges": [ -0.5, -0.44999998807907104, -0.4000000059604645, -0.3499999940395355, -0.30000001192092896, -0.25, -0.19999998807907104, -0.15000000596046448, -0.09999999403953552, -0.04999998211860657, 0.0, 0.050000011920928955, 0.10000002384185791, 0.15000003576278687, 0.19999998807907104, 0.25, 0.30000001192092896, 0.3500000238418579, 0.40000003576278687, 0.44999998807907104, 0.5 ] } }, "transformer.layers.11.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "abs_mean": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ], "norm": 0.0, "elements": 1048576, "histogram": { "counts": [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], "bin_edges": [ -0.5, -0.44999998807907104, -0.4000000059604645, -0.3499999940395355, -0.30000001192092896, -0.25, -0.19999998807907104, -0.15000000596046448, -0.09999999403953552, -0.04999998211860657, 0.0, 0.050000011920928955, 0.10000002384185791, 0.15000003576278687, 0.19999998807907104, 0.25, 0.30000001192092896, 0.3500000238418579, 0.40000003576278687, 0.44999998807907104, 0.5 ] } }, "transformer.layers.11.2.to_out.0.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "abs_mean": 0.0, "sparsity": 1.0, "shape": [ 1024 ], "norm": 0.0, "elements": 1024, "histogram": { "counts": [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], "bin_edges": [ -0.5, -0.44999998807907104, -0.4000000059604645, -0.3499999940395355, -0.30000001192092896, -0.25, -0.19999998807907104, -0.15000000596046448, -0.09999999403953552, -0.04999998211860657, 0.0, 0.050000011920928955, 0.10000002384185791, 0.15000003576278687, 0.19999998807907104, 0.25, 0.30000001192092896, 0.3500000238418579, 0.40000003576278687, 0.44999998807907104, 0.5 ] } }, "transformer.layers.11.3.g": { "min": 1.0, "max": 1.0, "mean": 1.0, "std": 0.0, "abs_mean": 1.0, "sparsity": 0.0, "shape": [ 1024 ], "norm": 32.0, "elements": 1024, "histogram": { "counts": [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], "bin_edges": [ 0.5, 0.550000011920929, 0.6000000238418579, 0.6499999761581421, 0.699999988079071, 0.75, 0.800000011920929, 0.8500000238418579, 0.8999999761581421, 0.9500000476837158, 1.0, 1.0499999523162842, 1.100000023841858, 1.1500000953674316, 1.2000000476837158, 1.25, 1.2999999523162842, 1.350000023841858, 1.4000000953674316, 1.4500000476837158, 1.5 ] } }, "transformer.layers.11.4.ff.0.0.weight": { "min": -0.031249985098838806, "max": 0.031249992549419403, "mean": -8.39352924231207e-06, "std": 0.018043218180537224, "abs_mean": 0.015625, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 36.949981689453125, "elements": 4194304, "histogram": { "counts": [ 61, 48, 38, 52, 58, 61, 40, 44, 59, 56, 56, 40, 43, 41, 48, 53, 49, 47, 44, 62 ], "bin_edges": [ -0.03120383620262146, -0.02808719128370285, -0.02497054636478424, -0.021853899583220482, -0.018737254664301872, -0.015620609745383263, -0.012503962963819504, -0.009387318044900894, -0.0062706731259822845, -0.003154028207063675, -3.738328814506531e-05, 0.0030792616307735443, 0.006195910274982452, 0.009312555193901062, 0.012429200112819672, 0.015545845031738281, 0.01866248995065689, 0.0217791348695755, 0.02489577978849411, 0.02801242470741272, 0.031129073351621628 ] } }, "transformer.layers.11.4.ff.0.0.bias": { "min": -0.03124961629509926, "max": 0.031239181756973267, "mean": 0.00015365774743258953, "std": 0.017994258552789688, "abs_mean": 0.015541428700089455, "sparsity": 0.0, "shape": [ 4096 ], "norm": 1.151534080505371, "elements": 4096, "histogram": { "counts": [ 41, 51, 42, 64, 49, 41, 50, 56, 39, 57, 55, 44, 48, 51, 54, 54, 54, 50, 55, 45 ], "bin_edges": [ -0.03124961629509926, -0.02812858298420906, -0.025007549673318863, -0.021886516362428665, -0.018765483051538467, -0.01564444974064827, -0.012523418292403221, -0.009402384981513023, -0.006281351670622826, -0.003160318359732628, -3.9285048842430115e-05, 0.0030817463994026184, 0.006202779710292816, 0.009323813021183014, 0.012444846332073212, 0.01556587964296341, 0.018686912953853607, 0.021807946264743805, 0.024928979575634003, 0.0280500128865242, 0.031171046197414398 ] } }, "transformer.layers.11.4.ff.2.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "abs_mean": 0.0, "sparsity": 1.0, "shape": [ 1024, 4096 ], "norm": 0.0, "elements": 4194304, "histogram": { "counts": [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], "bin_edges": [ -0.5, -0.44999998807907104, -0.4000000059604645, -0.3499999940395355, -0.30000001192092896, -0.25, -0.19999998807907104, -0.15000000596046448, -0.09999999403953552, -0.04999998211860657, 0.0, 0.050000011920928955, 0.10000002384185791, 0.15000003576278687, 0.19999998807907104, 0.25, 0.30000001192092896, 0.3500000238418579, 0.40000003576278687, 0.44999998807907104, 0.5 ] } }, "transformer.layers.11.4.ff.2.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "abs_mean": 0.0, "sparsity": 1.0, "shape": [ 1024 ], "norm": 0.0, "elements": 1024, "histogram": { "counts": [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], "bin_edges": [ -0.5, -0.44999998807907104, -0.4000000059604645, -0.3499999940395355, -0.30000001192092896, -0.25, -0.19999998807907104, -0.15000000596046448, -0.09999999403953552, -0.04999998211860657, 0.0, 0.050000011920928955, 0.10000002384185791, 0.15000003576278687, 0.19999998807907104, 0.25, 0.30000001192092896, 0.3500000238418579, 0.40000003576278687, 0.44999998807907104, 0.5 ] } }, "transformer.layers.12.1.g": { "min": 0.3829966187477112, "max": 0.718121349811554, "mean": 0.5806018114089966, "std": 0.03862323611974716, "abs_mean": 0.5806018114089966, "sparsity": 0.0, "shape": [ 1024 ], "norm": 18.620281219482422, "elements": 1024, "histogram": { "counts": [ 2, 3, 3, 1, 0, 8, 14, 19, 44, 100, 116, 212, 192, 153, 71, 38, 16, 3, 3, 2 ], "bin_edges": [ 0.3829966187477112, 0.3997528553009033, 0.41650909185409546, 0.4332653284072876, 0.45002156496047974, 0.4667778015136719, 0.483534038066864, 0.5002902746200562, 0.5170465111732483, 0.5338027477264404, 0.5505589842796326, 0.5673152208328247, 0.5840714573860168, 0.600827693939209, 0.6175839304924011, 0.6343401670455933, 0.6510964035987854, 0.6678526401519775, 0.6846088767051697, 0.7013651132583618, 0.718121349811554 ] } }, "transformer.layers.12.2.to_q.weight": { "min": -0.23782959580421448, "max": 0.1963561624288559, "mean": 2.6626767066773027e-05, "std": 0.03746971860527992, "abs_mean": 0.02968023158609867, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 38.36855697631836, "elements": 1048576, "histogram": { "counts": [ 2, 4, 1, 11, 17, 27, 54, 93, 123, 132, 127, 122, 108, 74, 40, 28, 20, 11, 4, 2 ], "bin_edges": [ -0.12963607907295227, -0.11716864258050919, -0.1047012135386467, -0.09223377704620361, -0.07976634800434113, -0.06729891151189804, -0.054831475019454956, -0.04236404597759247, -0.029896609485149384, -0.0174291729927063, -0.004961743950843811, 0.007505685091018677, 0.01997312903404236, 0.032440558075904846, 0.044907987117767334, 0.057375431060791016, 0.0698428601026535, 0.08231028914451599, 0.09477773308753967, 0.10724516212940216, 0.11971258372068405 ] } }, "transformer.layers.12.2.to_q.bias": { "min": -0.11848776042461395, "max": 0.1658152937889099, "mean": 0.0009899433935061097, "std": 0.027532605454325676, "abs_mean": 0.019537298008799553, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.8811826705932617, "elements": 1024, "histogram": { "counts": [ 3, 3, 4, 9, 13, 35, 110, 225, 267, 178, 94, 24, 13, 8, 8, 4, 0, 0, 1, 1 ], "bin_edges": [ -0.11848776042461395, -0.10427260398864746, -0.09005745500326157, -0.07584229856729507, -0.06162714585661888, -0.04741199314594269, -0.033196836709976196, -0.0189816877245903, -0.00476653128862381, 0.009448617696762085, 0.023663774132728577, 0.03787893056869507, 0.05209408700466156, 0.06630924344062805, 0.08052438497543335, 0.09473954141139984, 0.10895469784736633, 0.12316985428333282, 0.13738499581813812, 0.1516001671552658, 0.1658152937889099 ] } }, "transformer.layers.12.2.to_k.weight": { "min": -0.2458610236644745, "max": 0.5000857710838318, "mean": -5.0437982281437144e-05, "std": 0.037627607583999634, "abs_mean": 0.029811149463057518, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 38.53023910522461, "elements": 1048576, "histogram": { "counts": [ 2, 3, 8, 23, 39, 65, 119, 149, 173, 152, 119, 71, 36, 25, 9, 5, 1, 0, 0, 1 ], "bin_edges": [ -0.13122329115867615, -0.1159626767039299, -0.10070206224918365, -0.08544144034385681, -0.07018082588911057, -0.05492021143436432, -0.039659589529037476, -0.02439897507429123, -0.009138360619544983, 0.00612226128578186, 0.02138286828994751, 0.03664349019527435, 0.051904112100601196, 0.06716471910476685, 0.08242534101009369, 0.09768594801425934, 0.11294656991958618, 0.12820717692375183, 0.14346781373023987, 0.15872842073440552, 0.17398902773857117 ] } }, "transformer.layers.12.2.to_k.bias": { "min": -3.936108350753784, "max": 3.7635273933410645, "mean": -0.003571532666683197, "std": 0.6807447671890259, "abs_mean": 0.44434577226638794, "sparsity": 0.0, "shape": [ 1024 ], "norm": 21.77349281311035, "elements": 1024, "histogram": { "counts": [ 2, 1, 2, 2, 6, 6, 13, 37, 89, 257, 337, 162, 39, 28, 5, 4, 3, 4, 1, 2 ], "bin_edges": [ -3.936108350753784, -3.551126480102539, -3.166144847869873, -2.781162977218628, -2.396181106567383, -2.011199474334717, -1.6262176036834717, -1.2412359714508057, -0.8562541007995605, -0.47127223014831543, -0.08629059791564941, 0.2986910343170166, 0.6836731433868408, 1.0686547756195068, 1.4536364078521729, 1.838618516921997, 2.223600149154663, 2.608581781387329, 2.9935638904571533, 3.3785455226898193, 3.7635273933410645 ] } }, "transformer.layers.12.2.to_v.weight": { "min": -0.2272127866744995, "max": 0.25125452876091003, "mean": -1.1669091691146605e-05, "std": 0.03743912652134895, "abs_mean": 0.029495541006326675, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 38.337135314941406, "elements": 1048576, "histogram": { "counts": [ 2, 0, 1, 4, 9, 21, 38, 90, 131, 162, 167, 137, 92, 78, 42, 17, 5, 2, 0, 2 ], "bin_edges": [ -0.15501034259796143, -0.1397821605205536, -0.12455396354198456, -0.10932578146457672, -0.09409759193658829, -0.07886940240859985, -0.06364122033119202, -0.04841303080320358, -0.03318484127521515, -0.017956659197807312, -0.0027284622192382812, 0.012499719858169556, 0.027727901935577393, 0.04295609891414642, 0.05818428099155426, 0.07341247797012329, 0.08864066004753113, 0.10386884212493896, 0.1190970242023468, 0.13432523608207703, 0.14955341815948486 ] } }, "transformer.layers.12.2.to_v.bias": { "min": -0.07160257548093796, "max": 0.08056868612766266, "mean": -0.0005193912656977773, "std": 0.015654100105166435, "abs_mean": 0.012508584186434746, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.5009623169898987, "elements": 1024, "histogram": { "counts": [ 1, 0, 1, 4, 12, 21, 85, 138, 182, 176, 155, 136, 61, 20, 4, 2, 0, 1, 0, 1 ], "bin_edges": [ -0.07160257548093796, -0.06399401277303696, -0.056385450065135956, -0.048776887357234955, -0.041168324649333954, -0.03355976194143295, -0.025951199233531952, -0.01834263652563095, -0.01073407381772995, -0.003125511109828949, 0.004483051598072052, 0.012091614305973053, 0.019700177013874054, 0.027308739721775055, 0.034917302429676056, 0.04252586513757706, 0.05013442784547806, 0.05774299055337906, 0.06535155326128006, 0.07296011596918106, 0.08056868612766266 ] } }, "transformer.layers.12.2.to_out.0.weight": { "min": -0.22808189690113068, "max": 0.25764524936676025, "mean": -2.8624439437408e-05, "std": 0.03542578965425491, "abs_mean": 0.02771892212331295, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 36.27553176879883, "elements": 1048576, "histogram": { "counts": [ 2, 1, 6, 20, 27, 61, 87, 108, 135, 148, 131, 96, 84, 44, 26, 17, 4, 1, 0, 2 ], "bin_edges": [ -0.11833298951387405, -0.10563816130161285, -0.09294333308935165, -0.08024850487709045, -0.06755367666482925, -0.054858848452568054, -0.042164020240306854, -0.029469192028045654, -0.016774363815784454, -0.004079535603523254, 0.008615292608737946, 0.02131011337041855, 0.034004949033260345, 0.04669978469610214, 0.059394605457782745, 0.07208942621946335, 0.08478426188230515, 0.09747909754514694, 0.11017391830682755, 0.12286873906850815, 0.13556356728076935 ] } }, "transformer.layers.12.2.to_out.0.bias": { "min": -0.2000962197780609, "max": 0.21490387618541718, "mean": -0.0055319443345069885, "std": 0.0682973712682724, "abs_mean": 0.05413249880075455, "sparsity": 0.0, "shape": [ 1024 ], "norm": 2.1916093826293945, "elements": 1024, "histogram": { "counts": [ 7, 9, 10, 35, 36, 53, 72, 108, 115, 134, 106, 105, 80, 45, 43, 19, 12, 5, 4, 2 ], "bin_edges": [ -0.2000962197780609, -0.1793462187051773, -0.1585962176322937, -0.1378462016582489, -0.1170962005853653, -0.09634619951248169, -0.07559619098901749, -0.054846182465553284, -0.03409618139266968, -0.013346180319786072, 0.007403820753097534, 0.028153836727142334, 0.04890383780002594, 0.06965383887290955, 0.09040385484695435, 0.11115384101867676, 0.13190385699272156, 0.15265387296676636, 0.17340385913848877, 0.19415387511253357, 0.21490387618541718 ] } }, "transformer.layers.12.3.g": { "min": 0.4052681028842926, "max": 1.1870543956756592, "mean": 0.7378469705581665, "std": 0.05485502630472183, "abs_mean": 0.7378469705581665, "sparsity": 0.0, "shape": [ 1024 ], "norm": 23.676198959350586, "elements": 1024, "histogram": { "counts": [ 1, 0, 1, 1, 7, 72, 268, 371, 160, 72, 22, 12, 5, 4, 1, 0, 0, 0, 1, 2 ], "bin_edges": [ 0.46438509225845337, 0.5005185604095459, 0.5366520285606384, 0.572785496711731, 0.6089189648628235, 0.645052433013916, 0.6811858415603638, 0.7173193693161011, 0.7534527778625488, 0.7895863056182861, 0.8257197141647339, 0.8618532419204712, 0.897986650466919, 0.9341201186180115, 0.970253586769104, 1.0063869953155518, 1.042520523071289, 1.0786540508270264, 1.1147874593734741, 1.1509208679199219, 1.1870543956756592 ] } }, "transformer.layers.12.4.ff.0.0.weight": { "min": -0.22090063989162445, "max": 0.24591459333896637, "mean": 0.0005211709067225456, "std": 0.041342560201883316, "abs_mean": 0.032862767577171326, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 84.66887664794922, "elements": 4194304, "histogram": { "counts": [ 4, 5, 11, 18, 33, 44, 83, 105, 127, 129, 141, 87, 93, 47, 35, 15, 14, 5, 2, 2 ], "bin_edges": [ -0.13292989134788513, -0.1190410628914833, -0.10515223443508148, -0.09126341342926025, -0.07737458497285843, -0.0634857565164566, -0.049596935510635376, -0.03570810705423355, -0.021819278597831726, -0.007930450141429901, 0.005958378314971924, 0.019847199320793152, 0.03373602032661438, 0.0476248562335968, 0.06151367723941803, 0.07540251314640045, 0.08929133415222168, 0.10318015515804291, 0.11706899106502533, 0.13095781207084656, 0.14484664797782898 ] } }, "transformer.layers.12.4.ff.0.0.bias": { "min": -0.10329551994800568, "max": 0.02418467588722706, "mean": -0.03265417367219925, "std": 0.0188569538295269, "abs_mean": 0.03297993913292885, "sparsity": 0.0, "shape": [ 4096 ], "norm": 2.413226842880249, "elements": 4096, "histogram": { "counts": [ 5, 8, 8, 18, 30, 39, 57, 78, 97, 121, 136, 115, 106, 75, 53, 30, 16, 6, 1, 1 ], "bin_edges": [ -0.0908934697508812, -0.08513956516981125, -0.0793856531381607, -0.07363174855709076, -0.06787784397602081, -0.06212393194437027, -0.056370027363300323, -0.05061611905694008, -0.044862210750579834, -0.03910830244421959, -0.033354394137859344, -0.027600489556789398, -0.021846584975719452, -0.01609267294406891, -0.010338768362998962, -0.004584856331348419, 0.001169048249721527, 0.006922952830791473, 0.012676864862442017, 0.018430769443511963, 0.02418467588722706 ] } }, "transformer.layers.12.4.ff.2.weight": { "min": -0.44879788160324097, "max": 0.421781986951828, "mean": -0.00043243536492809653, "std": 0.046903904527425766, "abs_mean": 0.03645244985818863, "sparsity": 0.0, "shape": [ 1024, 4096 ], "norm": 96.05355072021484, "elements": 4194304, "histogram": { "counts": [ 2, 1, 4, 3, 18, 27, 59, 105, 138, 184, 165, 131, 94, 33, 25, 5, 2, 2, 1, 1 ], "bin_edges": [ -0.19662070274353027, -0.17642301321029663, -0.1562253087759018, -0.13602760434150696, -0.11582991480827332, -0.09563221782445908, -0.07543452084064484, -0.05523681640625, -0.03503912687301636, -0.014841437339782715, 0.005356267094612122, 0.025553971529006958, 0.0457516610622406, 0.06594935059547424, 0.08614706993103027, 0.10634475946426392, 0.12654244899749756, 0.1467401385307312, 0.16693782806396484, 0.18713554739952087, 0.20733323693275452 ] } }, "transformer.layers.12.4.ff.2.bias": { "min": -0.25108596682548523, "max": 0.46939900517463684, "mean": 0.003194585908204317, "std": 0.04450792446732521, "abs_mean": 0.03129497915506363, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.4272236824035645, "elements": 1024, "histogram": { "counts": [ 1, 0, 2, 5, 25, 103, 371, 303, 149, 34, 3, 0, 0, 1, 1, 1, 0, 0, 0, 1 ], "bin_edges": [ -0.25108596682548523, -0.2150617241859436, -0.17903746664524078, -0.14301320910453796, -0.10698896646499634, -0.07096472382545471, -0.03494046628475189, 0.0010837912559509277, 0.037108033895492554, 0.07313227653503418, 0.1091565191745758, 0.14518079161643982, 0.18120503425598145, 0.21722927689552307, 0.2532535493373871, 0.2892777621746063, 0.32530203461647034, 0.36132630705833435, 0.3973505198955536, 0.4333747923374176, 0.46939900517463684 ] } }, "transformer.layers.13.0.weight": { "min": -0.3169757127761841, "max": 0.33316904306411743, "mean": -2.5288825781899504e-05, "std": 0.021290883421897888, "abs_mean": 0.016878249123692513, "sparsity": 0.0, "shape": [ 1024, 2048 ], "norm": 30.831497192382812, "elements": 2097152, "histogram": { "counts": [ 2, 2, 4, 14, 20, 34, 63, 82, 110, 136, 130, 112, 104, 75, 48, 26, 28, 6, 2, 2 ], "bin_edges": [ -0.07049302756786346, -0.06357433646917343, -0.0566556490957737, -0.049736957997083664, -0.04281827062368393, -0.035899579524993896, -0.028980888426303864, -0.02206220105290413, -0.015143509954214096, -0.008224818855524063, -0.0013061314821243286, 0.005612559616565704, 0.012531250715255737, 0.01944994181394577, 0.026368625462055206, 0.03328731656074524, 0.04020600765943527, 0.047124698758125305, 0.05404338985681534, 0.06096208095550537, 0.0678807720541954 ] } }, "transformer.layers.13.1.g": { "min": 0.3246179223060608, "max": 0.6840593218803406, "mean": 0.5709414482116699, "std": 0.04453985393047333, "abs_mean": 0.5709414482116699, "sparsity": 0.0, "shape": [ 1024 ], "norm": 18.325580596923828, "elements": 1024, "histogram": { "counts": [ 2, 1, 0, 1, 4, 5, 8, 14, 11, 25, 48, 74, 123, 163, 219, 184, 69, 30, 15, 4 ], "bin_edges": [ 0.3246179223060608, 0.34259000420570374, 0.3605620563030243, 0.37853413820266724, 0.3965061902999878, 0.41447827219963074, 0.4324503540992737, 0.45042240619659424, 0.4683944880962372, 0.4863665699958801, 0.5043386220932007, 0.522310733795166, 0.5402827858924866, 0.5582548379898071, 0.5762269496917725, 0.5941989421844482, 0.6121710538864136, 0.6301431655883789, 0.6481151580810547, 0.66608726978302, 0.6840593218803406 ] } }, "transformer.layers.13.2.to_q.weight": { "min": -0.16449199616909027, "max": 0.17385058104991913, "mean": -4.8540678108111024e-05, "std": 0.033184703439474106, "abs_mean": 0.026343977078795433, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 33.98078155517578, "elements": 1048576, "histogram": { "counts": [ 3, 3, 6, 19, 37, 64, 88, 137, 134, 154, 128, 96, 55, 28, 22, 14, 5, 5, 1, 1 ], "bin_edges": [ -0.10950843244791031, -0.09739306569099426, -0.08527769148349762, -0.07316232472658157, -0.06104695796966553, -0.04893159121274948, -0.03681621700525284, -0.024700850248336792, -0.012585483491420746, -0.0004701167345046997, 0.011645250022411346, 0.02376062422990799, 0.03587599843740463, 0.04799135774374008, 0.060106731951236725, 0.07222209125757217, 0.08433746546506882, 0.09645283967256546, 0.10856819897890091, 0.12068357318639755, 0.1327989399433136 ] } }, "transformer.layers.13.2.to_q.bias": { "min": -0.18657186627388, "max": 0.14269262552261353, "mean": 3.6818586522713304e-05, "std": 0.029670175164937973, "abs_mean": 0.02127697318792343, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.9489827156066895, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 0, 5, 2, 7, 14, 39, 123, 216, 284, 179, 78, 23, 15, 7, 1, 2, 4 ], "bin_edges": [ -0.18657186627388, -0.1701086461544037, -0.15364542603492737, -0.13718219101428986, -0.12071897089481354, -0.10425575077533722, -0.0877925232052803, -0.07132929563522339, -0.05486607551574707, -0.03840285539627075, -0.021939635276794434, -0.005476400256156921, 0.010986819863319397, 0.027450039982795715, 0.04391327500343323, 0.060376495122909546, 0.07683971524238586, 0.09330293536186218, 0.1097661554813385, 0.12622937560081482, 0.14269262552261353 ] } }, "transformer.layers.13.2.to_k.weight": { "min": -0.3801823556423187, "max": 0.24568894505500793, "mean": -1.0017960448749363e-05, "std": 0.0327659472823143, "abs_mean": 0.026017045602202415, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 33.55192565917969, "elements": 1048576, "histogram": { "counts": [ 7, 17, 28, 45, 73, 96, 108, 116, 105, 105, 104, 65, 58, 28, 22, 13, 7, 1, 0, 2 ], "bin_edges": [ -0.0795261487364769, -0.06986591219902039, -0.06020567566156387, -0.05054543912410736, -0.04088520258665085, -0.031224966049194336, -0.021564729511737823, -0.011904492974281311, -0.0022442564368247986, 0.007415980100631714, 0.017076216638088226, 0.02673645317554474, 0.03639668971300125, 0.04605693370103836, 0.055717162787914276, 0.06537739187479019, 0.0750376358628273, 0.08469787985086441, 0.09435810893774033, 0.10401833802461624, 0.11367857456207275 ] } }, "transformer.layers.13.2.to_k.bias": { "min": -3.6502017974853516, "max": 3.2850754261016846, "mean": -0.014260413125157356, "std": 0.9845133423805237, "abs_mean": 0.7433228492736816, "sparsity": 0.0, "shape": [ 1024 ], "norm": 31.492347717285156, "elements": 1024, "histogram": { "counts": [ 2, 1, 6, 12, 14, 23, 46, 69, 101, 127, 189, 159, 89, 59, 44, 21, 24, 9, 3, 2 ], "bin_edges": [ -3.6502017974853516, -3.3034379482269287, -2.956674098968506, -2.609910249710083, -2.26314640045166, -1.9163825511932373, -1.5696187019348145, -1.2228548526763916, -0.8760910034179688, -0.5293271541595459, -0.18256330490112305, 0.1642005443572998, 0.5109643936157227, 0.8577280044555664, 1.2044920921325684, 1.5512561798095703, 1.898019790649414, 2.244783401489258, 2.5915474891662598, 2.9383115768432617, 3.2850754261016846 ] } }, "transformer.layers.13.2.to_v.weight": { "min": -0.2349099963903427, "max": 0.2473423033952713, "mean": -1.7784623196348548e-05, "std": 0.04170290008187294, "abs_mean": 0.03274451196193695, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 42.70320510864258, "elements": 1048576, "histogram": { "counts": [ 2, 5, 9, 26, 36, 91, 134, 145, 163, 132, 107, 75, 43, 18, 8, 2, 2, 1, 0, 1 ], "bin_edges": [ -0.13875167071819305, -0.12214882671833038, -0.10554599016904831, -0.08894315361976624, -0.07234030961990356, -0.055737465620040894, -0.03913462907075882, -0.022531792521476746, -0.005928948521614075, 0.010673895478248596, 0.027276739478111267, 0.043879568576812744, 0.060482412576675415, 0.07708525657653809, 0.09368808567523956, 0.11029092967510223, 0.1268937736749649, 0.14349661767482758, 0.16009946167469025, 0.17670230567455292, 0.1933051198720932 ] } }, "transformer.layers.13.2.to_v.bias": { "min": -0.07268015295267105, "max": 0.1542970985174179, "mean": 0.000663664482999593, "std": 0.02515619620680809, "abs_mean": 0.01977265253663063, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.8048853874206543, "elements": 1024, "histogram": { "counts": [ 7, 13, 32, 81, 110, 180, 191, 142, 117, 69, 36, 18, 2, 1, 0, 0, 0, 0, 0, 1 ], "bin_edges": [ -0.07268015295267105, -0.06133129075169563, -0.049982428550720215, -0.0386335626244545, -0.02728470042347908, -0.015935838222503662, -0.0045869722962379456, 0.006761886179447174, 0.01811075210571289, 0.029459618031978607, 0.04080847650766373, 0.05215734243392944, 0.06350620836019516, 0.07485506683588028, 0.0862039253115654, 0.09755279868841171, 0.10890165716409683, 0.12025051563978195, 0.13159939646720886, 0.1429482400417328, 0.1542970985174179 ] } }, "transformer.layers.13.2.to_out.0.weight": { "min": -0.2664458751678467, "max": 0.2483866959810257, "mean": -1.5342577171395533e-05, "std": 0.040143273770809174, "abs_mean": 0.03165973350405693, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 41.10619354248047, "elements": 1048576, "histogram": { "counts": [ 1, 0, 4, 6, 11, 19, 50, 74, 109, 151, 153, 150, 99, 78, 51, 29, 11, 1, 2, 1 ], "bin_edges": [ -0.1581651270389557, -0.14322325587272644, -0.1282813996076584, -0.11333952844142914, -0.09839766472578049, -0.08345580101013184, -0.06851392984390259, -0.05357206612825394, -0.038630202412605286, -0.023688331246376038, -0.008746474981307983, 0.006195396184921265, 0.021137267351150513, 0.03607912361621857, 0.051020994782447815, 0.06596285104751587, 0.08090472221374512, 0.09584659337997437, 0.11078846454620361, 0.12573030591011047, 0.14067217707633972 ] } }, "transformer.layers.13.2.to_out.0.bias": { "min": -0.18931904435157776, "max": 0.19443899393081665, "mean": -0.0012288358993828297, "std": 0.06666287034749985, "abs_mean": 0.054270241409540176, "sparsity": 0.0, "shape": [ 1024 ], "norm": 2.1325325965881348, "elements": 1024, "histogram": { "counts": [ 3, 7, 14, 25, 33, 55, 81, 100, 107, 101, 108, 88, 80, 67, 55, 42, 18, 11, 3, 2 ], "bin_edges": [ -0.18931904435157776, -0.1701311469078064, -0.15094324946403503, -0.13175533711910248, -0.11256743967533112, -0.09337954223155975, -0.0741916373372078, -0.055003732442855835, -0.03581583499908447, -0.01662793755531311, 0.002559959888458252, 0.021747872233390808, 0.04093576967716217, 0.06012366712093353, 0.07931157946586609, 0.09849947690963745, 0.11768737435340881, 0.13687527179718018, 0.15606316924095154, 0.1752510666847229, 0.19443899393081665 ] } }, "transformer.layers.13.3.g": { "min": 0.32919859886169434, "max": 0.997564435005188, "mean": 0.7190552949905396, "std": 0.051983967423439026, "abs_mean": 0.7190552949905396, "sparsity": 0.0, "shape": [ 1024 ], "norm": 23.069761276245117, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 0, 0, 1, 4, 3, 17, 71, 220, 309, 224, 91, 35, 15, 2, 4, 1, 2 ], "bin_edges": [ 0.32919859886169434, 0.3626168966293335, 0.39603519439697266, 0.42945346236228943, 0.4628717601299286, 0.49629005789756775, 0.5297083258628845, 0.5631266236305237, 0.5965449213981628, 0.629963219165802, 0.6633815169334412, 0.6967997550964355, 0.7302180528640747, 0.7636363506317139, 0.797054648399353, 0.8304729461669922, 0.8638912439346313, 0.8973095417022705, 0.9307278394699097, 0.9641461372375488, 0.997564435005188 ] } }, "transformer.layers.13.4.ff.0.0.weight": { "min": -0.2313733994960785, "max": 0.24550800025463104, "mean": 0.00018263014499098063, "std": 0.04090628772974014, "abs_mean": 0.03251039609313011, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 83.76873016357422, "elements": 4194304, "histogram": { "counts": [ 1, 0, 0, 1, 11, 15, 27, 49, 59, 95, 121, 137, 122, 118, 106, 67, 31, 23, 13, 4 ], "bin_edges": [ -0.16133558750152588, -0.1475898176431656, -0.1338440328836441, -0.12009826302528381, -0.10635248571634293, -0.09260670840740204, -0.07886093854904175, -0.06511516124010086, -0.05136938393115997, -0.037623606622219086, -0.023877829313278198, -0.010132059454917908, 0.003613710403442383, 0.017359495162963867, 0.031105265021324158, 0.04485104978084564, 0.05859681963920593, 0.07234258949756622, 0.08608837425708771, 0.099834144115448, 0.11357992142438889 ] } }, "transformer.layers.13.4.ff.0.0.bias": { "min": -0.11402574181556702, "max": 0.018650896847248077, "mean": -0.0424647182226181, "std": 0.0188254714012146, "abs_mean": 0.042548101395368576, "sparsity": 0.0, "shape": [ 4096 ], "norm": 2.972773313522339, "elements": 4096, "histogram": { "counts": [ 4, 1, 7, 10, 19, 36, 56, 76, 113, 141, 125, 148, 100, 78, 50, 19, 10, 3, 3, 1 ], "bin_edges": [ -0.10659940540790558, -0.10033688694238663, -0.09407437592744827, -0.08781185746192932, -0.08154934644699097, -0.07528682798147202, -0.06902430951595306, -0.06276179850101471, -0.05649928003549576, -0.050236765295267105, -0.04397425055503845, -0.0377117320895195, -0.03144921362400055, -0.025186702609062195, -0.018924184143543243, -0.012661673128604889, -0.0063991546630859375, -0.00013663619756698608, 0.006125874817371368, 0.01238839328289032, 0.018650896847248077 ] } }, "transformer.layers.13.4.ff.2.weight": { "min": -0.3894314467906952, "max": 0.4067791998386383, "mean": -2.1846279196324758e-05, "std": 0.048540692776441574, "abs_mean": 0.03770503029227257, "sparsity": 0.0, "shape": [ 1024, 4096 ], "norm": 99.40045928955078, "elements": 4194304, "histogram": { "counts": [ 1, 2, 1, 15, 20, 39, 79, 146, 184, 194, 126, 93, 57, 21, 14, 5, 2, 0, 0, 1 ], "bin_edges": [ -0.19731265306472778, -0.17545032501220703, -0.15358801186084747, -0.13172568380832672, -0.10986336320638657, -0.08800104260444641, -0.06613871455192566, -0.0442764014005661, -0.02241407334804535, -0.0005517452955245972, 0.02131056785583496, 0.04317289590835571, 0.06503522396087646, 0.08689755201339722, 0.10875985026359558, 0.13062217831611633, 0.15248450636863708, 0.17434683442115784, 0.1962091624736786, 0.21807146072387695, 0.2399337887763977 ] } }, "transformer.layers.13.4.ff.2.bias": { "min": -0.692162811756134, "max": 0.4120035469532013, "mean": 0.000852768833283335, "std": 0.060242246836423874, "abs_mean": 0.039657142013311386, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.9270035028457642, "elements": 1024, "histogram": { "counts": [ 2, 0, 0, 0, 0, 0, 0, 0, 1, 4, 34, 200, 463, 246, 39, 7, 1, 1, 1, 1 ], "bin_edges": [ -0.692162811756134, -0.6369544863700867, -0.5817461609840393, -0.5265378952026367, -0.47132954001426697, -0.4161212146282196, -0.36091291904449463, -0.30570459365844727, -0.2504962682723999, -0.19528794288635254, -0.14007961750030518, -0.08487129211425781, -0.029663026332855225, 0.02554529905319214, 0.0807536244392395, 0.13596194982528687, 0.19117027521133423, 0.2463786005973816, 0.30158692598342896, 0.3567952513694763, 0.4120035469532013 ] } }, "transformer.layers.14.0.weight": { "min": 0.0, "max": 1.0, "mean": 0.00048828125, "std": 0.0220916960388422, "abs_mean": 0.00048828125, "sparsity": 0.99951171875, "shape": [ 1024, 2048 ], "norm": 32.0, "elements": 2097152, "histogram": { "counts": [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], "bin_edges": [ -0.5, -0.44999998807907104, -0.4000000059604645, -0.3499999940395355, -0.30000001192092896, -0.25, -0.19999998807907104, -0.15000000596046448, -0.09999999403953552, -0.04999998211860657, 0.0, 0.050000011920928955, 0.10000002384185791, 0.15000003576278687, 0.19999998807907104, 0.25, 0.30000001192092896, 0.3500000238418579, 0.40000003576278687, 0.44999998807907104, 0.5 ] } }, "transformer.layers.14.1.g": { "min": 1.0, "max": 1.0, "mean": 1.0, "std": 0.0, "abs_mean": 1.0, "sparsity": 0.0, "shape": [ 1024 ], "norm": 32.0, "elements": 1024, "histogram": { "counts": [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], "bin_edges": [ 0.5, 0.550000011920929, 0.6000000238418579, 0.6499999761581421, 0.699999988079071, 0.75, 0.800000011920929, 0.8500000238418579, 0.8999999761581421, 0.9500000476837158, 1.0, 1.0499999523162842, 1.100000023841858, 1.1500000953674316, 1.2000000476837158, 1.25, 1.2999999523162842, 1.350000023841858, 1.4000000953674316, 1.4500000476837158, 1.5 ] } }, "transformer.layers.14.2.to_q.weight": { "min": -0.031249970197677612, "max": 0.031249817460775375, "mean": -2.1022657165303826e-05, "std": 0.018035436049103737, "abs_mean": 0.015622841194272041, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 18.46811866760254, "elements": 1048576, "histogram": { "counts": [ 66, 57, 51, 56, 44, 39, 44, 44, 63, 52, 42, 43, 49, 41, 49, 40, 48, 58, 58, 56 ], "bin_edges": [ -0.03121861070394516, -0.028114574030041695, -0.02501053735613823, -0.021906500682234764, -0.0188024640083313, -0.015698427334427834, -0.012594390660524368, -0.009490353986620903, -0.006386317312717438, -0.0032822806388139725, -0.0001782439649105072, 0.002925790846347809, 0.006029829382896423, 0.009133867919445038, 0.012237902730703354, 0.01534193754196167, 0.018445976078510284, 0.0215500146150589, 0.024654049426317215, 0.02775808423757553, 0.030862122774124146 ] } }, "transformer.layers.14.2.to_q.bias": { "min": -0.03122086077928543, "max": 0.031233571469783783, "mean": -0.0006771883927285671, "std": 0.01782997138798237, "abs_mean": 0.015417349524796009, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.5706920027732849, "elements": 1024, "histogram": { "counts": [ 52, 41, 59, 51, 59, 57, 46, 54, 44, 58, 42, 60, 52, 45, 51, 45, 47, 45, 46, 46 ], "bin_edges": [ -0.03122086077928543, -0.02809813991189003, -0.02497541718184948, -0.02185269631445408, -0.01872997358441353, -0.015607252717018127, -0.012484531849622726, -0.009361809119582176, -0.006239088252186775, -0.003116367384791374, 6.355345249176025e-06, 0.003129076212644577, 0.006251797080039978, 0.009374517947435379, 0.012497242540121078, 0.01561996340751648, 0.01874268427491188, 0.02186540514230728, 0.024988126009702682, 0.028110850602388382, 0.031233571469783783 ] } }, "transformer.layers.14.2.to_k.weight": { "min": -0.03124987706542015, "max": 0.031249921768903732, "mean": -8.839062502374873e-06, "std": 0.01803446188569069, "abs_mean": 0.015615805983543396, "sparsity": 9.5367431640625e-07, "shape": [ 1024, 1024 ], "norm": 18.467140197753906, "elements": 1048576, "histogram": { "counts": [ 56, 50, 49, 51, 53, 57, 44, 69, 39, 45, 39, 46, 61, 42, 49, 49, 49, 56, 39, 57 ], "bin_edges": [ -0.03123004361987114, -0.02810853160917759, -0.02498701959848404, -0.02186550945043564, -0.01874399743974209, -0.015622485429048538, -0.012500975281000137, -0.009379463270306587, -0.006257951259613037, -0.003136439248919487, -1.492723822593689e-05, 0.003106582909822464, 0.006228093057870865, 0.009349606931209564, 0.012471117079257965, 0.015592630952596664, 0.018714141100645065, 0.021835651248693466, 0.024957165122032166, 0.028078675270080566, 0.031200189143419266 ] } }, "transformer.layers.14.2.to_k.bias": { "min": -0.031232360750436783, "max": 0.031245984137058258, "mean": -0.0007298353011719882, "std": 0.017944591119885445, "abs_mean": 0.015577686950564384, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.5744214653968811, "elements": 1024, "histogram": { "counts": [ 50, 52, 50, 43, 54, 69, 62, 40, 54, 49, 56, 38, 54, 50, 47, 43, 42, 54, 40, 53 ], "bin_edges": [ -0.031232360750436783, -0.028108444064855576, -0.02498452737927437, -0.021860608831048012, -0.018736692145466805, -0.015612775459885597, -0.01248885691165924, -0.009364940226078033, -0.006241023540496826, -0.003117106854915619, 6.809830665588379e-06, 0.0031307265162467957, 0.006254646927118301, 0.009378563612699509, 0.012502480298280716, 0.015626396983861923, 0.01875031366944313, 0.021874230355024338, 0.024998147040605545, 0.028122063726186752, 0.031245984137058258 ] } }, "transformer.layers.14.2.to_v.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "abs_mean": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ], "norm": 0.0, "elements": 1048576, "histogram": { "counts": [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], "bin_edges": [ -0.5, -0.44999998807907104, -0.4000000059604645, -0.3499999940395355, -0.30000001192092896, -0.25, -0.19999998807907104, -0.15000000596046448, -0.09999999403953552, -0.04999998211860657, 0.0, 0.050000011920928955, 0.10000002384185791, 0.15000003576278687, 0.19999998807907104, 0.25, 0.30000001192092896, 0.3500000238418579, 0.40000003576278687, 0.44999998807907104, 0.5 ] } }, "transformer.layers.14.2.to_v.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "abs_mean": 0.0, "sparsity": 1.0, "shape": [ 1024 ], "norm": 0.0, "elements": 1024, "histogram": { "counts": [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], "bin_edges": [ -0.5, -0.44999998807907104, -0.4000000059604645, -0.3499999940395355, -0.30000001192092896, -0.25, -0.19999998807907104, -0.15000000596046448, -0.09999999403953552, -0.04999998211860657, 0.0, 0.050000011920928955, 0.10000002384185791, 0.15000003576278687, 0.19999998807907104, 0.25, 0.30000001192092896, 0.3500000238418579, 0.40000003576278687, 0.44999998807907104, 0.5 ] } }, "transformer.layers.14.2.to_out.0.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "abs_mean": 0.0, "sparsity": 1.0, "shape": [ 1024, 1024 ], "norm": 0.0, "elements": 1048576, "histogram": { "counts": [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], "bin_edges": [ -0.5, -0.44999998807907104, -0.4000000059604645, -0.3499999940395355, -0.30000001192092896, -0.25, -0.19999998807907104, -0.15000000596046448, -0.09999999403953552, -0.04999998211860657, 0.0, 0.050000011920928955, 0.10000002384185791, 0.15000003576278687, 0.19999998807907104, 0.25, 0.30000001192092896, 0.3500000238418579, 0.40000003576278687, 0.44999998807907104, 0.5 ] } }, "transformer.layers.14.2.to_out.0.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "abs_mean": 0.0, "sparsity": 1.0, "shape": [ 1024 ], "norm": 0.0, "elements": 1024, "histogram": { "counts": [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], "bin_edges": [ -0.5, -0.44999998807907104, -0.4000000059604645, -0.3499999940395355, -0.30000001192092896, -0.25, -0.19999998807907104, -0.15000000596046448, -0.09999999403953552, -0.04999998211860657, 0.0, 0.050000011920928955, 0.10000002384185791, 0.15000003576278687, 0.19999998807907104, 0.25, 0.30000001192092896, 0.3500000238418579, 0.40000003576278687, 0.44999998807907104, 0.5 ] } }, "transformer.layers.14.3.g": { "min": 1.0, "max": 1.0, "mean": 1.0, "std": 0.0, "abs_mean": 1.0, "sparsity": 0.0, "shape": [ 1024 ], "norm": 32.0, "elements": 1024, "histogram": { "counts": [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], "bin_edges": [ 0.5, 0.550000011920929, 0.6000000238418579, 0.6499999761581421, 0.699999988079071, 0.75, 0.800000011920929, 0.8500000238418579, 0.8999999761581421, 0.9500000476837158, 1.0, 1.0499999523162842, 1.100000023841858, 1.1500000953674316, 1.2000000476837158, 1.25, 1.2999999523162842, 1.350000023841858, 1.4000000953674316, 1.4500000476837158, 1.5 ] } }, "transformer.layers.14.4.ff.0.0.weight": { "min": -0.03125, "max": 0.031249988824129105, "mean": 3.591749646147946e-06, "std": 0.018040824681520462, "abs_mean": 0.015623635612428188, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 36.945003509521484, "elements": 4194304, "histogram": { "counts": [ 44, 44, 36, 44, 48, 49, 52, 50, 49, 51, 55, 40, 56, 52, 54, 46, 67, 56, 45, 62 ], "bin_edges": [ -0.03122270107269287, -0.028100185096263885, -0.0249776691198349, -0.021855153143405914, -0.01873263716697693, -0.015610120259225368, -0.012487603351473808, -0.009365087375044823, -0.006242571398615837, -0.0031200554221868515, 2.4605542421340942e-06, 0.003124978393316269, 0.0062474943697452545, 0.00937001034617424, 0.012492526322603226, 0.015615042299032211, 0.018737558275461197, 0.021860074251890182, 0.024982590228319168, 0.028105106204748154, 0.03122762218117714 ] } }, "transformer.layers.14.4.ff.0.0.bias": { "min": -0.031234480440616608, "max": 0.031246982514858246, "mean": 0.0001957040512934327, "std": 0.018076537176966667, "abs_mean": 0.015660608187317848, "sparsity": 0.0, "shape": [ 4096 ], "norm": 1.1568249464035034, "elements": 4096, "histogram": { "counts": [ 46, 33, 53, 46, 59, 36, 56, 53, 52, 38, 53, 51, 55, 49, 48, 43, 44, 64, 55, 66 ], "bin_edges": [ -0.031234480440616608, -0.028114622458815575, -0.02499476447701454, -0.021874908357858658, -0.018755050376057625, -0.015635192394256592, -0.012515336275100708, -0.009395478293299675, -0.006275620311498642, -0.003155762329697609, -3.590434789657593e-05, 0.003083951771259308, 0.006203807890415192, 0.009323667734861374, 0.012443523854017258, 0.01556338369846344, 0.018683239817619324, 0.021803095936775208, 0.02492295578122139, 0.028042811900377274, 0.031162668019533157 ] } }, "transformer.layers.14.4.ff.2.weight": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "abs_mean": 0.0, "sparsity": 1.0, "shape": [ 1024, 4096 ], "norm": 0.0, "elements": 4194304, "histogram": { "counts": [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], "bin_edges": [ -0.5, -0.44999998807907104, -0.4000000059604645, -0.3499999940395355, -0.30000001192092896, -0.25, -0.19999998807907104, -0.15000000596046448, -0.09999999403953552, -0.04999998211860657, 0.0, 0.050000011920928955, 0.10000002384185791, 0.15000003576278687, 0.19999998807907104, 0.25, 0.30000001192092896, 0.3500000238418579, 0.40000003576278687, 0.44999998807907104, 0.5 ] } }, "transformer.layers.14.4.ff.2.bias": { "min": 0.0, "max": 0.0, "mean": 0.0, "std": 0.0, "abs_mean": 0.0, "sparsity": 1.0, "shape": [ 1024 ], "norm": 0.0, "elements": 1024, "histogram": { "counts": [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1000, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], "bin_edges": [ -0.5, -0.44999998807907104, -0.4000000059604645, -0.3499999940395355, -0.30000001192092896, -0.25, -0.19999998807907104, -0.15000000596046448, -0.09999999403953552, -0.04999998211860657, 0.0, 0.050000011920928955, 0.10000002384185791, 0.15000003576278687, 0.19999998807907104, 0.25, 0.30000001192092896, 0.3500000238418579, 0.40000003576278687, 0.44999998807907104, 0.5 ] } }, "transformer.layers.15.0.weight": { "min": -0.23450319468975067, "max": 0.2724616229534149, "mean": 6.948144346097251e-06, "std": 0.01881224475800991, "abs_mean": 0.014990497380495071, "sparsity": 0.0, "shape": [ 1024, 2048 ], "norm": 27.24210548400879, "elements": 2097152, "histogram": { "counts": [ 2, 2, 7, 10, 26, 53, 66, 75, 121, 125, 127, 119, 78, 77, 46, 34, 19, 8, 4, 1 ], "bin_edges": [ -0.06102960929274559, -0.05501559376716614, -0.049001578241586685, -0.04298756271600723, -0.03697354719042778, -0.030959531664848328, -0.024945516139268875, -0.018931500613689423, -0.01291748508810997, -0.006903469562530518, -0.0008894540369510651, 0.005124557763338089, 0.01113857701420784, 0.01715259626507759, 0.023166608065366745, 0.0291806198656559, 0.03519463911652565, 0.0412086583673954, 0.047222670167684555, 0.05323668196797371, 0.05925070121884346 ] } }, "transformer.layers.15.1.g": { "min": 0.32128843665122986, "max": 0.6922435760498047, "mean": 0.5815606117248535, "std": 0.045744746923446655, "abs_mean": 0.5815606117248535, "sparsity": 0.0, "shape": [ 1024 ], "norm": 18.6673641204834, "elements": 1024, "histogram": { "counts": [ 1, 2, 0, 2, 2, 5, 10, 8, 10, 20, 37, 53, 112, 177, 186, 214, 95, 39, 20, 7 ], "bin_edges": [ 0.32128843665122986, 0.3398361802101135, 0.3583839535713196, 0.37693169713020325, 0.3954794704914093, 0.41402721405029297, 0.432574987411499, 0.4511227309703827, 0.46967047452926636, 0.4882182478904724, 0.5067660212516785, 0.5253137350082397, 0.5438615083694458, 0.5624092817306519, 0.5809570550918579, 0.599504828453064, 0.6180525422096252, 0.6366002559661865, 0.6551480293273926, 0.6736958026885986, 0.6922435760498047 ] } }, "transformer.layers.15.2.to_q.weight": { "min": -0.18168264627456665, "max": 0.1974717229604721, "mean": -1.171275016531581e-05, "std": 0.03318728506565094, "abs_mean": 0.026278000324964523, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 33.98333740234375, "elements": 1048576, "histogram": { "counts": [ 2, 2, 7, 14, 23, 32, 46, 77, 125, 120, 132, 122, 97, 84, 45, 39, 19, 6, 6, 2 ], "bin_edges": [ -0.10859622806310654, -0.0980859100818634, -0.08757558465003967, -0.07706526666879654, -0.0665549486875534, -0.05604463070631027, -0.04553430527448654, -0.03502398729324341, -0.024513669312000275, -0.014003351330757141, -0.0034930333495140076, 0.007017292082309723, 0.017527617514133453, 0.02803792804479599, 0.03854825347661972, 0.04905856400728226, 0.05956888943910599, 0.07007921487092972, 0.08058952540159225, 0.09109985083341599, 0.10161017626523972 ] } }, "transformer.layers.15.2.to_q.bias": { "min": -0.16043128073215485, "max": 0.1292782723903656, "mean": -0.0010662535205483437, "std": 0.034117527306079865, "abs_mean": 0.02523173578083515, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.0917609930038452, "elements": 1024, "histogram": { "counts": [ 2, 1, 4, 5, 5, 3, 23, 38, 79, 131, 195, 200, 147, 91, 36, 17, 13, 5, 3, 2 ], "bin_edges": [ -0.16043128073215485, -0.14594580233097076, -0.13146032392978668, -0.1169748455286026, -0.10248936712741852, -0.08800388872623444, -0.07351841032505035, -0.05903293192386627, -0.04454745352268219, -0.030061975121498108, -0.015576496720314026, -0.0010910183191299438, 0.013394460082054138, 0.02787993848323822, 0.0423654168844223, 0.056850895285606384, 0.07133637368679047, 0.08582185208797455, 0.10030733048915863, 0.11479280889034271, 0.1292782723903656 ] } }, "transformer.layers.15.2.to_k.weight": { "min": -0.3318951725959778, "max": 0.31116846203804016, "mean": -1.0326401024940424e-05, "std": 0.03223801404237747, "abs_mean": 0.025547126308083534, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 33.01129913330078, "elements": 1048576, "histogram": { "counts": [ 3, 7, 9, 25, 46, 66, 86, 90, 118, 132, 116, 82, 72, 62, 38, 25, 11, 3, 6, 3 ], "bin_edges": [ -0.09133105725049973, -0.08163873851299286, -0.07194641977548599, -0.06225409731268883, -0.05256177857518196, -0.042869459837675095, -0.03317713737487793, -0.023484818637371063, -0.013792499899864197, -0.00410018116235733, 0.005592137575149536, 0.015284456312656403, 0.024976782500743866, 0.034669093787670135, 0.0443614199757576, 0.05405373126268387, 0.06374605745077133, 0.0734383836388588, 0.08313069492578506, 0.09282302111387253, 0.1025153324007988 ] } }, "transformer.layers.15.2.to_k.bias": { "min": -7.791203022003174, "max": 8.74953842163086, "mean": 0.09337067604064941, "std": 1.61784029006958, "abs_mean": 0.9389018416404724, "sparsity": 0.0, "shape": [ 1024 ], "norm": 51.8317985534668, "elements": 1024, "histogram": { "counts": [ 1, 6, 4, 6, 3, 3, 22, 55, 205, 417, 170, 54, 21, 6, 4, 7, 4, 5, 1, 6 ], "bin_edges": [ -7.791203022003174, -6.964166164398193, -6.137128829956055, -5.310091972351074, -4.483055114746094, -3.656017780303955, -2.8289809226989746, -2.001943588256836, -1.1749067306518555, -0.347869873046875, 0.47916746139526367, 1.3062043190002441, 2.1332411766052246, 2.960278034210205, 3.787315845489502, 4.614352703094482, 5.441389560699463, 6.268426418304443, 7.095463275909424, 7.922501087188721, 8.74953842163086 ] } }, "transformer.layers.15.2.to_v.weight": { "min": -0.23363685607910156, "max": 0.24183623492717743, "mean": 4.133234324399382e-05, "std": 0.0408620610833168, "abs_mean": 0.03202153369784355, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 41.84220504760742, "elements": 1048576, "histogram": { "counts": [ 2, 4, 2, 5, 11, 20, 30, 65, 87, 106, 137, 143, 126, 93, 67, 46, 23, 18, 10, 5 ], "bin_edges": [ -0.14702486991882324, -0.13389593362808228, -0.12076699733734131, -0.10763806104660034, -0.09450912475585938, -0.08138018846511841, -0.06825125217437744, -0.055122315883636475, -0.04199337959289551, -0.02886444330215454, -0.015735507011413574, -0.0026065707206726074, 0.01052236557006836, 0.023651301860809326, 0.03678023815155029, 0.04990917444229126, 0.06303811073303223, 0.0761670470237732, 0.08929598331451416, 0.10242491960525513, 0.11555387079715729 ] } }, "transformer.layers.15.2.to_v.bias": { "min": -0.07588791847229004, "max": 0.0656837597489357, "mean": 0.00047856790479272604, "std": 0.01940334029495716, "abs_mean": 0.015272315591573715, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.6207925081253052, "elements": 1024, "histogram": { "counts": [ 4, 0, 2, 4, 7, 23, 36, 61, 117, 128, 143, 143, 128, 96, 56, 22, 14, 11, 4, 1 ], "bin_edges": [ -0.07588791847229004, -0.06880933791399002, -0.061730749905109406, -0.05465216934680939, -0.04757358506321907, -0.040495000779628754, -0.033416420221328735, -0.02633783593773842, -0.019259251654148102, -0.012180671095848083, -0.005102083086967468, 0.00197649747133255, 0.009055078029632568, 0.016133666038513184, 0.023212246596813202, 0.030290834605693817, 0.037369415163993835, 0.044447995722293854, 0.05152657628059387, 0.05860516428947449, 0.0656837597489357 ] } }, "transformer.layers.15.2.to_out.0.weight": { "min": -0.2455652505159378, "max": 0.2337566763162613, "mean": -2.8880367608508095e-06, "std": 0.03943672403693199, "abs_mean": 0.031019341200590134, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 40.38263702392578, "elements": 1048576, "histogram": { "counts": [ 1, 0, 2, 5, 9, 23, 52, 99, 158, 183, 167, 134, 87, 44, 22, 7, 4, 2, 0, 1 ], "bin_edges": [ -0.17114542424678802, -0.15392161905765533, -0.13669782876968384, -0.11947402358055115, -0.10225021839141846, -0.08502641320228577, -0.06780261546373367, -0.05057881772518158, -0.03335501253604889, -0.0161312073469162, 0.0010925978422164917, 0.01831638813018799, 0.03554019331932068, 0.05276399850845337, 0.06998778879642487, 0.08721159398555756, 0.10443539917469025, 0.12165920436382294, 0.13888300955295563, 0.15610681474208832, 0.1733306348323822 ] } }, "transformer.layers.15.2.to_out.0.bias": { "min": -0.16261433064937592, "max": 0.1605682373046875, "mean": 0.0016338212881237268, "std": 0.06525633484125137, "abs_mean": 0.054562319070100784, "sparsity": 0.0, "shape": [ 1024 ], "norm": 2.0878376960754395, "elements": 1024, "histogram": { "counts": [ 8, 11, 23, 35, 34, 49, 76, 81, 85, 70, 68, 99, 91, 82, 63, 52, 39, 19, 10, 5 ], "bin_edges": [ -0.16261433064937592, -0.14645519852638245, -0.13029608130455017, -0.1141369491815567, -0.09797781705856323, -0.08181868493556976, -0.06565956026315689, -0.04950043559074402, -0.03334130346775055, -0.01718217134475708, -0.0010230392217636108, 0.015136078000068665, 0.031295210123062134, 0.0474543422460556, 0.06361345946788788, 0.07977259159088135, 0.09593172371387482, 0.11209084093570709, 0.12824998795986176, 0.14440910518169403, 0.1605682373046875 ] } }, "transformer.layers.15.3.g": { "min": 0.5568146705627441, "max": 0.9421050548553467, "mean": 0.7127699851989746, "std": 0.03979077190160751, "abs_mean": 0.7127699851989746, "sparsity": 0.0, "shape": [ 1024 ], "norm": 22.8441219329834, "elements": 1024, "histogram": { "counts": [ 3, 1, 1, 12, 29, 85, 155, 213, 205, 153, 70, 38, 15, 10, 3, 2, 2, 2, 0, 1 ], "bin_edges": [ 0.5568146705627441, 0.5760791897773743, 0.5953437089920044, 0.6146082282066345, 0.6338727474212646, 0.6531372666358948, 0.6724017858505249, 0.691666305065155, 0.7109308242797852, 0.7301953434944153, 0.7494598627090454, 0.7687243819236755, 0.7879889011383057, 0.8072534203529358, 0.8265179395675659, 0.845782458782196, 0.8650469779968262, 0.8843114972114563, 0.9035760164260864, 0.9228405356407166, 0.9421050548553467 ] } }, "transformer.layers.15.4.ff.0.0.weight": { "min": -0.22831875085830688, "max": 0.2548784911632538, "mean": -4.536488631856628e-05, "std": 0.040581412613391876, "abs_mean": 0.03228280693292618, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 83.1028060913086, "elements": 4194304, "histogram": { "counts": [ 1, 1, 3, 12, 23, 39, 67, 94, 133, 127, 128, 104, 88, 78, 44, 26, 17, 10, 3, 2 ], "bin_edges": [ -0.1346137970685959, -0.12144477665424347, -0.10827574878931046, -0.09510672837495804, -0.08193770051002502, -0.06876868009567261, -0.05559965968132019, -0.042430631816387177, -0.02926161140203476, -0.016092590987682343, -0.0029235631227493286, 0.010245457291603088, 0.023414477705955505, 0.03658349812030792, 0.04975253343582153, 0.06292155385017395, 0.07609057426452637, 0.08925959467887878, 0.1024286150932312, 0.11559765040874481, 0.12876667082309723 ] } }, "transformer.layers.15.4.ff.0.0.bias": { "min": -0.13459284603595734, "max": 0.02228192612528801, "mean": -0.04134010896086693, "std": 0.018355557695031166, "abs_mean": 0.04144716635346413, "sparsity": 0.0, "shape": [ 4096 ], "norm": 2.8947877883911133, "elements": 4096, "histogram": { "counts": [ 4, 2, 7, 22, 26, 36, 74, 95, 136, 125, 122, 115, 98, 61, 46, 15, 8, 5, 2, 1 ], "bin_edges": [ -0.10183046758174896, -0.09562484920024872, -0.08941923081874847, -0.08321361243724823, -0.07700798660516739, -0.07080236822366714, -0.0645967498421669, -0.05839112773537636, -0.052185509353876114, -0.04597989097237587, -0.03977426886558533, -0.03356865048408508, -0.02736303210258484, -0.021157413721084595, -0.014951787889003754, -0.00874616950750351, -0.0025405511260032654, 0.0036650672554969788, 0.009870685636997223, 0.016076311469078064, 0.02228192612528801 ] } }, "transformer.layers.15.4.ff.2.weight": { "min": -0.4211236536502838, "max": 0.3922184407711029, "mean": -4.3558138713706285e-06, "std": 0.04779110848903656, "abs_mean": 0.03745824098587036, "sparsity": 0.0, "shape": [ 1024, 4096 ], "norm": 97.86603546142578, "elements": 4194304, "histogram": { "counts": [ 3, 1, 3, 13, 21, 47, 78, 120, 132, 160, 127, 128, 81, 44, 28, 9, 2, 2, 0, 1 ], "bin_edges": [ -0.1714564561843872, -0.15337276458740234, -0.13528907299041748, -0.11720538139343262, -0.09912168979644775, -0.08103799819946289, -0.06295430660247803, -0.044870615005493164, -0.0267869234085083, -0.008703231811523438, 0.009380459785461426, 0.02746415138244629, 0.04554784297943115, 0.06363153457641602, 0.08171522617340088, 0.09979891777038574, 0.1178826093673706, 0.13596630096435547, 0.15404999256134033, 0.1721336841583252, 0.19021736085414886 ] } }, "transformer.layers.15.4.ff.2.bias": { "min": -0.6065256595611572, "max": 0.6503778696060181, "mean": 0.0015810506884008646, "std": 0.05679204687476158, "abs_mean": 0.039021797478199005, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.8171623945236206, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 0, 0, 0, 2, 10, 177, 487, 280, 38, 1, 3, 0, 0, 0, 0, 0, 1 ], "bin_edges": [ -0.6065256595611572, -0.5436804890632629, -0.48083531856536865, -0.417990118265152, -0.3551449477672577, -0.2922997772693634, -0.22945457696914673, -0.16660940647125244, -0.10376423597335815, -0.04091906547546387, 0.02192610502243042, 0.08477127552032471, 0.14761650562286377, 0.21046167612075806, 0.27330684661865234, 0.33615201711654663, 0.3989971876144409, 0.46184241771698, 0.5246875286102295, 0.5875327587127686, 0.6503778696060181 ] } }, "transformer.layers.16.0.weight": { "min": -0.2516687214374542, "max": 0.3206498920917511, "mean": -6.057634891476482e-06, "std": 0.0196156594902277, "abs_mean": 0.015561186708509922, "sparsity": 0.0, "shape": [ 1024, 2048 ], "norm": 28.40553855895996, "elements": 2097152, "histogram": { "counts": [ 1, 0, 2, 3, 6, 19, 34, 76, 107, 136, 142, 149, 134, 91, 52, 28, 10, 6, 3, 1 ], "bin_edges": [ -0.08025113493204117, -0.07286576926708221, -0.06548041105270386, -0.058095045387744904, -0.05070968344807625, -0.04332432150840759, -0.03593895584344864, -0.028553593903779984, -0.021168231964111328, -0.013782866299152374, -0.006397508084774017, 0.0009878575801849365, 0.00837322324514389, 0.015758581459522247, 0.0231439471244812, 0.030529305338859558, 0.03791467100381851, 0.04530002921819687, 0.05268540233373642, 0.06007076054811478, 0.06745612621307373 ] } }, "transformer.layers.16.1.g": { "min": 0.35995498299598694, "max": 0.6810278296470642, "mean": 0.5706292986869812, "std": 0.042767371982336044, "abs_mean": 0.5706292986869812, "sparsity": 0.0, "shape": [ 1024 ], "norm": 18.31130027770996, "elements": 1024, "histogram": { "counts": [ 2, 1, 2, 2, 7, 5, 14, 18, 21, 33, 56, 113, 146, 159, 175, 162, 56, 17, 9, 2 ], "bin_edges": [ 0.35995498299598694, 0.37600862979888916, 0.3920622766017914, 0.4081159234046936, 0.42416954040527344, 0.44022321701049805, 0.4562768340110779, 0.4723304808139801, 0.4883841276168823, 0.5044378042221069, 0.5204914212226868, 0.5365450382232666, 0.5525987148284912, 0.568652331829071, 0.5847059488296509, 0.6007596254348755, 0.6168133020401001, 0.6328669190406799, 0.6489205360412598, 0.6649742126464844, 0.6810278296470642 ] } }, "transformer.layers.16.2.to_q.weight": { "min": -0.22037938237190247, "max": 0.1769036501646042, "mean": -3.467117130639963e-05, "std": 0.03430242836475372, "abs_mean": 0.027101609855890274, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 35.12527847290039, "elements": 1048576, "histogram": { "counts": [ 2, 6, 16, 16, 32, 40, 61, 99, 140, 142, 108, 116, 83, 68, 30, 20, 14, 5, 1, 1 ], "bin_edges": [ -0.10248468071222305, -0.09197874367237091, -0.08147279918193817, -0.07096686214208603, -0.06046092137694359, -0.04995498061180115, -0.039449043571949005, -0.028943099081516266, -0.018437162041664124, -0.007931225001811981, 0.002574719488620758, 0.0130806565284729, 0.023586593568325043, 0.03409253805875778, 0.04459848254919052, 0.05510441213846207, 0.0656103566288948, 0.07611630111932755, 0.08662223070859909, 0.09712817519903183, 0.10763411223888397 ] } }, "transformer.layers.16.2.to_q.bias": { "min": -0.16339237987995148, "max": 0.23269455134868622, "mean": 0.00036311167059466243, "std": 0.03283863142132759, "abs_mean": 0.022844718769192696, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.0503872632980347, "elements": 1024, "histogram": { "counts": [ 3, 2, 0, 3, 15, 30, 108, 263, 298, 179, 60, 25, 6, 1, 3, 0, 1, 1, 0, 2 ], "bin_edges": [ -0.16339237987995148, -0.14358803629875183, -0.12378368526697159, -0.10397933423519135, -0.0841749906539917, -0.06437064707279205, -0.04456629604101181, -0.024761945009231567, -0.004957601428031921, 0.014846742153167725, 0.03465108573436737, 0.05445544421672821, 0.07425978779792786, 0.0940641313791275, 0.11386848986148834, 0.1336728185415268, 0.15347717702388763, 0.17328153550624847, 0.19308586418628693, 0.21289022266864777, 0.23269455134868622 ] } }, "transformer.layers.16.2.to_k.weight": { "min": -0.2634328007698059, "max": 0.23954781889915466, "mean": -5.2383133152034134e-05, "std": 0.03390158340334892, "abs_mean": 0.026794826611876488, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 34.71477508544922, "elements": 1048576, "histogram": { "counts": [ 2, 2, 8, 12, 14, 63, 68, 81, 125, 134, 144, 99, 91, 56, 56, 18, 18, 6, 2, 1 ], "bin_edges": [ -0.11294181644916534, -0.10158675163984299, -0.09023168683052063, -0.07887662947177887, -0.06752156466245651, -0.056166499853134155, -0.044811442494392395, -0.03345637768507004, -0.02210131287574768, -0.010746248066425323, 0.0006088167428970337, 0.011963874101638794, 0.023318931460380554, 0.03467400372028351, 0.04602906107902527, 0.05738413333892822, 0.06873919069766998, 0.08009424805641174, 0.0914493203163147, 0.10280437767505646, 0.11415943503379822 ] } }, "transformer.layers.16.2.to_k.bias": { "min": -4.847443580627441, "max": 5.083292484283447, "mean": 0.043835077434778214, "std": 1.227935552597046, "abs_mean": 0.7887641191482544, "sparsity": 0.0, "shape": [ 1024 ], "norm": 39.29978561401367, "elements": 1024, "histogram": { "counts": [ 5, 5, 5, 10, 12, 20, 33, 48, 106, 346, 217, 64, 40, 30, 20, 19, 11, 6, 1, 2 ], "bin_edges": [ -4.847443580627441, -4.350906848907471, -3.854369878768921, -3.35783314704895, -2.8612961769104004, -2.3647594451904297, -1.868222713470459, -1.3716857433319092, -0.8751490116119385, -0.37861204147338867, 0.11792469024658203, 0.6144614219665527, 1.1109981536865234, 1.6075348854064941, 2.104072093963623, 2.6006088256835938, 3.0971455574035645, 3.593682289123535, 4.090219497680664, 4.586755752563477, 5.083292484283447 ] } }, "transformer.layers.16.2.to_v.weight": { "min": -0.24653136730194092, "max": 0.25027644634246826, "mean": 7.213905337266624e-05, "std": 0.04399324953556061, "abs_mean": 0.034520190209150314, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 45.04863739013672, "elements": 1048576, "histogram": { "counts": [ 1, 0, 2, 4, 13, 18, 39, 70, 93, 121, 123, 132, 100, 99, 85, 46, 23, 15, 5, 11 ], "bin_edges": [ -0.1605098992586136, -0.14611366391181946, -0.13171742856502533, -0.1173211857676506, -0.10292494297027588, -0.08852870762348175, -0.07413247227668762, -0.0597362294793129, -0.04533999413251877, -0.030943751335144043, -0.016547515988349915, -0.002151280641555786, 0.012244954705238342, 0.02664119005203247, 0.04103744029998779, 0.05543367564678192, 0.06982991099357605, 0.08422614634037018, 0.0986223965883255, 0.11301861703395844, 0.12741486728191376 ] } }, "transformer.layers.16.2.to_v.bias": { "min": -0.06254159659147263, "max": 0.054444003850221634, "mean": 0.000650427769869566, "std": 0.017183585092425346, "abs_mean": 0.013821554370224476, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.5500001311302185, "elements": 1024, "histogram": { "counts": [ 1, 1, 3, 3, 9, 31, 50, 70, 107, 118, 138, 121, 113, 91, 60, 50, 20, 6, 5, 3 ], "bin_edges": [ -0.06254159659147263, -0.05669231712818146, -0.05084303766489029, -0.04499375447630882, -0.039144475013017654, -0.033295195549726486, -0.02744591236114502, -0.02159663289785385, -0.015747353434562683, -0.009898073971271515, -0.004048794507980347, 0.0018004849553108215, 0.007649771869182587, 0.013499051332473755, 0.019348330795764923, 0.02519761025905609, 0.03104688972234726, 0.03689616918563843, 0.042745448648929596, 0.048594728112220764, 0.054444003850221634 ] } }, "transformer.layers.16.2.to_out.0.weight": { "min": -0.28619009256362915, "max": 0.2717132866382599, "mean": -4.993668699171394e-05, "std": 0.04299163073301315, "abs_mean": 0.03383805230259895, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 44.022953033447266, "elements": 1048576, "histogram": { "counts": [ 1, 0, 0, 6, 12, 16, 31, 81, 120, 161, 187, 155, 106, 68, 33, 12, 7, 2, 0, 2 ], "bin_edges": [ -0.18760468065738678, -0.16928882896900177, -0.15097299218177795, -0.13265714049339294, -0.11434129625558853, -0.09602545201778412, -0.07770960032939911, -0.05939376354217529, -0.04107791185379028, -0.022762060165405273, -0.0044462233781814575, 0.013869628310203552, 0.03218547999858856, 0.05050131678581238, 0.0688171535730362, 0.0871330052614212, 0.10544885694980621, 0.12376470863819122, 0.14208056032657623, 0.16039638221263885, 0.17871224880218506 ] } }, "transformer.layers.16.2.to_out.0.bias": { "min": -0.16040603816509247, "max": 0.17025713622570038, "mean": -0.0028844610787928104, "std": 0.05926158279180527, "abs_mean": 0.04866192489862442, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.8976906538009644, "elements": 1024, "histogram": { "counts": [ 2, 10, 15, 33, 49, 52, 99, 86, 117, 95, 87, 100, 81, 52, 53, 30, 22, 13, 3, 1 ], "bin_edges": [ -0.16040603816509247, -0.14387288689613342, -0.12733972072601318, -0.11080656200647354, -0.0942734032869339, -0.07774024456739426, -0.061207085847854614, -0.04467392712831497, -0.02814076840877533, -0.01160760223865509, 0.004925549030303955, 0.021458700299263, 0.03799186646938324, 0.05452503263950348, 0.07105818390846252, 0.08759133517742157, 0.10412450134754181, 0.12065766751766205, 0.1371908336877823, 0.15372397005558014, 0.17025713622570038 ] } }, "transformer.layers.16.3.g": { "min": 0.5196964740753174, "max": 0.9310137629508972, "mean": 0.7133955955505371, "std": 0.03807961940765381, "abs_mean": 0.7133955955505371, "sparsity": 0.0, "shape": [ 1024 ], "norm": 22.861127853393555, "elements": 1024, "histogram": { "counts": [ 1, 0, 1, 2, 10, 11, 60, 110, 221, 228, 186, 105, 37, 16, 8, 1, 1, 0, 1, 1 ], "bin_edges": [ 0.5196964740753174, 0.5402623414993286, 0.5608282089233398, 0.5813940763473511, 0.6019599437713623, 0.6225258111953735, 0.6430916786193848, 0.663657546043396, 0.6842234134674072, 0.7047892808914185, 0.7253551483154297, 0.7459209561347961, 0.7664868235588074, 0.7870526909828186, 0.8076185584068298, 0.8281844258308411, 0.8487502932548523, 0.8693161606788635, 0.8898820281028748, 0.910447895526886, 0.9310137629508972 ] } }, "transformer.layers.16.4.ff.0.0.weight": { "min": -0.23809659481048584, "max": 0.24939550459384918, "mean": 0.00046480150194838643, "std": 0.04046152904629707, "abs_mean": 0.03219496086239815, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 82.86290740966797, "elements": 4194304, "histogram": { "counts": [ 1, 5, 8, 16, 28, 47, 69, 99, 126, 116, 110, 117, 99, 62, 47, 19, 15, 9, 4, 3 ], "bin_edges": [ -0.12590646743774414, -0.11311858147382736, -0.10033069550991058, -0.08754280209541321, -0.07475491613149643, -0.06196703016757965, -0.049179136753082275, -0.0363912507891655, -0.023603364825248718, -0.01081547886133194, 0.001972407102584839, 0.014760300517082214, 0.02754819393157959, 0.04033607244491577, 0.05312396585941315, 0.06591184437274933, 0.0786997377872467, 0.09148763120174408, 0.10427550971508026, 0.11706340312957764, 0.12985128164291382 ] } }, "transformer.layers.16.4.ff.0.0.bias": { "min": -0.14403879642486572, "max": 0.041449662297964096, "mean": -0.03967723995447159, "std": 0.02051496133208275, "abs_mean": 0.040096282958984375, "sparsity": 0.0, "shape": [ 4096 ], "norm": 2.85861873626709, "elements": 4096, "histogram": { "counts": [ 1, 0, 0, 1, 4, 8, 12, 23, 41, 98, 144, 181, 170, 132, 79, 60, 21, 12, 8, 5 ], "bin_edges": [ -0.14403879642486572, -0.1353476494550705, -0.12665650248527527, -0.11796536296606064, -0.10927421599626541, -0.10058306902647018, -0.09189192950725555, -0.08320078253746033, -0.0745096355676651, -0.06581848859786987, -0.057127341628074646, -0.048436202108860016, -0.03974505513906479, -0.031053908169269562, -0.02236276865005493, -0.013671621680259705, -0.0049804747104644775, 0.0037106722593307495, 0.012401819229125977, 0.021092966198921204, 0.029784105718135834 ] } }, "transformer.layers.16.4.ff.2.weight": { "min": -0.5321223735809326, "max": 0.582199215888977, "mean": 5.9441426856210455e-06, "std": 0.04886837303638458, "abs_mean": 0.038299158215522766, "sparsity": 0.0, "shape": [ 1024, 4096 ], "norm": 100.07161712646484, "elements": 4194304, "histogram": { "counts": [ 1, 0, 1, 0, 7, 7, 24, 49, 88, 118, 177, 154, 144, 110, 55, 36, 16, 7, 3, 3 ], "bin_edges": [ -0.21546487510204315, -0.19615697860717773, -0.1768490970134735, -0.1575412005186081, -0.13823330402374268, -0.11892542243003845, -0.09961752593517303, -0.08030964434146881, -0.061001747846603394, -0.041693851351737976, -0.022385969758033752, -0.003078073263168335, 0.016229823231697083, 0.035537704825401306, 0.05484558641910553, 0.07415349781513214, 0.09346137940883636, 0.11276926100254059, 0.1320771723985672, 0.15138505399227142, 0.17069295048713684 ] } }, "transformer.layers.16.4.ff.2.bias": { "min": -0.5183588862419128, "max": 0.49274152517318726, "mean": 0.0023598431143909693, "std": 0.053401440382003784, "abs_mean": 0.03643597662448883, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.7096800804138184, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 0, 0, 1, 1, 10, 59, 279, 449, 173, 18, 5, 1, 0, 1, 0, 1, 1 ], "bin_edges": [ -0.5183588862419128, -0.46780386567115784, -0.41724884510040283, -0.3666938245296478, -0.3161388039588928, -0.2655837833881378, -0.2150287628173828, -0.1644737422466278, -0.1139187216758728, -0.0633637011051178, -0.012808680534362793, 0.0377463698387146, 0.08830136060714722, 0.13885635137557983, 0.18941140174865723, 0.23996645212173462, 0.29052144289016724, 0.34107643365859985, 0.39163148403167725, 0.44218653440475464, 0.49274152517318726 ] } }, "transformer.layers.17.0.weight": { "min": -0.27355626225471497, "max": 0.31514689326286316, "mean": 1.8169534996559378e-06, "std": 0.020052826032042503, "abs_mean": 0.015906326472759247, "sparsity": 0.0, "shape": [ 1024, 2048 ], "norm": 29.038618087768555, "elements": 2097152, "histogram": { "counts": [ 3, 3, 6, 10, 22, 41, 60, 90, 119, 102, 139, 134, 96, 71, 48, 34, 18, 2, 1, 1 ], "bin_edges": [ -0.06576590985059738, -0.059407200664281845, -0.05304849147796631, -0.046689778566360474, -0.04033106938004494, -0.0339723601937294, -0.027613647282123566, -0.02125493809580803, -0.014896228909492493, -0.008537519723176956, -0.0021788105368614197, 0.004179902374744415, 0.01053861528635025, 0.01689732074737549, 0.023256033658981323, 0.02961473912000656, 0.035973452031612396, 0.04233216494321823, 0.04869087040424347, 0.055049583315849304, 0.06140829250216484 ] } }, "transformer.layers.17.1.g": { "min": 0.36634165048599243, "max": 0.7102516293525696, "mean": 0.5930806994438171, "std": 0.04571138322353363, "abs_mean": 0.5930806994438171, "sparsity": 0.0, "shape": [ 1024 ], "norm": 19.03481674194336, "elements": 1024, "histogram": { "counts": [ 2, 2, 1, 9, 2, 5, 7, 13, 24, 36, 54, 100, 140, 187, 198, 113, 65, 33, 7, 2 ], "bin_edges": [ 0.36634165048599243, 0.3835371434688568, 0.4007326364517212, 0.41792815923690796, 0.43512365221977234, 0.4523191452026367, 0.4695146381855011, 0.4867101311683655, 0.5039056539535522, 0.5211011171340942, 0.538296639919281, 0.5554921627044678, 0.5726876258850098, 0.5898831486701965, 0.6070786118507385, 0.6242741346359253, 0.6414695978164673, 0.658665120601654, 0.6758606433868408, 0.6930561065673828, 0.7102516293525696 ] } }, "transformer.layers.17.2.to_q.weight": { "min": -0.21087931096553802, "max": 0.1994456797838211, "mean": 3.07354457618203e-05, "std": 0.034868594259023666, "abs_mean": 0.027569569647312164, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 35.704959869384766, "elements": 1048576, "histogram": { "counts": [ 3, 11, 16, 24, 59, 104, 133, 127, 149, 122, 95, 72, 43, 25, 5, 6, 3, 2, 0, 1 ], "bin_edges": [ -0.09528622031211853, -0.08350884169340134, -0.07173146307468414, -0.05995407700538635, -0.04817669838666916, -0.036399319767951965, -0.024621933698654175, -0.012844555079936981, -0.0010671764612197876, 0.010710202157497406, 0.0224875807762146, 0.03426496684551239, 0.04604235291481018, 0.05781972408294678, 0.06959711015224457, 0.08137448132038116, 0.09315186738967896, 0.10492925345897675, 0.11670662462711334, 0.12848401069641113, 0.14026139676570892 ] } }, "transformer.layers.17.2.to_q.bias": { "min": -0.1869715005159378, "max": 0.20369935035705566, "mean": 0.0009553421987220645, "std": 0.0314984992146492, "abs_mean": 0.02109266072511673, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.0079233646392822, "elements": 1024, "histogram": { "counts": [ 2, 1, 2, 4, 3, 6, 13, 60, 197, 350, 223, 100, 20, 4, 1, 8, 4, 1, 0, 1 ], "bin_edges": [ -0.1869715005159378, -0.16743795573711395, -0.1479044258594513, -0.12837088108062744, -0.10883733630180359, -0.08930379152297974, -0.06977025419473648, -0.050236716866493225, -0.030703172087669373, -0.01116962730884552, 0.008363917469978333, 0.02789744734764099, 0.047430992126464844, 0.0669645220041275, 0.08649806678295135, 0.10603161156177521, 0.12556515634059906, 0.1450987011194229, 0.16463224589824677, 0.18416579067707062, 0.20369935035705566 ] } }, "transformer.layers.17.2.to_k.weight": { "min": -0.28932973742485046, "max": 0.33943668007850647, "mean": -4.7415778681170195e-05, "std": 0.034589733928442, "abs_mean": 0.027366112917661667, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 35.41941833496094, "elements": 1048576, "histogram": { "counts": [ 1, 1, 6, 4, 13, 34, 43, 71, 96, 119, 119, 107, 138, 90, 68, 42, 22, 16, 8, 2 ], "bin_edges": [ -0.12474610656499863, -0.11343011260032654, -0.10211412608623505, -0.09079813212156296, -0.07948213815689087, -0.06816615164279938, -0.05685015767812729, -0.0455341711640358, -0.03421817719936371, -0.02290218323469162, -0.011586196720600128, -0.00027020275592803955, 0.011045791208744049, 0.02236177772283554, 0.03367776423692703, 0.04499376565217972, 0.05630975216627121, 0.0676257386803627, 0.07894174009561539, 0.09025772660970688, 0.10157372057437897 ] } }, "transformer.layers.17.2.to_k.bias": { "min": -3.8712191581726074, "max": 3.3820998668670654, "mean": 0.014444351196289062, "std": 0.8576834797859192, "abs_mean": 0.6098200678825378, "sparsity": 0.0, "shape": [ 1024 ], "norm": 27.43636131286621, "elements": 1024, "histogram": { "counts": [ 2, 2, 2, 3, 6, 17, 24, 27, 85, 167, 247, 187, 102, 55, 33, 15, 11, 10, 4, 1 ], "bin_edges": [ -3.8712191581726074, -3.5085532665252686, -3.1458873748779297, -2.7832212448120117, -2.420555353164673, -2.057889461517334, -1.695223331451416, -1.3325574398040771, -0.9698915481567383, -0.6072256565093994, -0.24455976486206055, 0.11810636520385742, 0.4807724952697754, 0.8434381484985352, 1.2061042785644531, 1.568769931793213, 1.9314360618591309, 2.294102191925049, 2.6567678451538086, 3.0194339752197266, 3.3820998668670654 ] } }, "transformer.layers.17.2.to_v.weight": { "min": -0.2242382913827896, "max": 0.24965918064117432, "mean": -4.0143440855899826e-06, "std": 0.04223589971661568, "abs_mean": 0.033358603715896606, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 43.24907684326172, "elements": 1048576, "histogram": { "counts": [ 1, 1, 3, 12, 23, 63, 86, 120, 148, 161, 141, 102, 56, 43, 26, 4, 7, 1, 1, 1 ], "bin_edges": [ -0.14771312475204468, -0.13164333999156952, -0.11557355523109436, -0.0995037704706192, -0.08343398571014404, -0.06736420094966888, -0.051294416189193726, -0.03522463142871857, -0.019154846668243408, -0.0030850619077682495, 0.01298472285270691, 0.029054507613182068, 0.04512429237365723, 0.061194077134132385, 0.07726386189460754, 0.0933336466550827, 0.10940343141555786, 0.12547320127487183, 0.14154300093650818, 0.15761280059814453, 0.1736825704574585 ] } }, "transformer.layers.17.2.to_v.bias": { "min": -0.05498581379652023, "max": 0.046769097447395325, "mean": -1.842428173404187e-05, "std": 0.015840334817767143, "abs_mean": 0.012838434427976608, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.506643533706665, "elements": 1024, "histogram": { "counts": [ 1, 1, 5, 2, 20, 34, 50, 76, 100, 112, 135, 95, 98, 115, 76, 41, 20, 11, 4, 4 ], "bin_edges": [ -0.05498581379652023, -0.049898069351911545, -0.044810324907302856, -0.03972257673740387, -0.03463483229279518, -0.029547087848186493, -0.024459341540932655, -0.019371595233678818, -0.01428385078907013, -0.009196106344461441, -0.004108361899852753, 0.0009793862700462341, 0.0060671307146549225, 0.01115487515926361, 0.016242623329162598, 0.021330364048480988, 0.026418112218379974, 0.03150586038827896, 0.03659360110759735, 0.04168134927749634, 0.046769097447395325 ] } }, "transformer.layers.17.2.to_out.0.weight": { "min": -0.2928566634654999, "max": 0.29091376066207886, "mean": -7.36157790015568e-06, "std": 0.04195090010762215, "abs_mean": 0.03324268013238907, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 42.957271575927734, "elements": 1048576, "histogram": { "counts": [ 2, 11, 14, 20, 39, 64, 81, 115, 134, 137, 123, 87, 76, 46, 25, 10, 12, 2, 1, 1 ], "bin_edges": [ -0.12461046129465103, -0.11084295809268951, -0.0970754474401474, -0.08330794423818588, -0.06954044103622437, -0.05577293783426285, -0.042005427181720734, -0.028237923979759216, -0.014470420777797699, -0.0007029175758361816, 0.013064585626125336, 0.02683209627866745, 0.040599606931209564, 0.054367102682590485, 0.0681346133351326, 0.08190210908651352, 0.09566961973905563, 0.10943713039159775, 0.12320462614297867, 0.13697212934494019, 0.1507396250963211 ] } }, "transformer.layers.17.2.to_out.0.bias": { "min": -0.12467863410711288, "max": 0.25901108980178833, "mean": -0.003233879804611206, "std": 0.05313729867339134, "abs_mean": 0.04382557421922684, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.7027106285095215, "elements": 1024, "histogram": { "counts": [ 17, 40, 63, 108, 104, 133, 129, 110, 108, 81, 65, 31, 9, 1, 0, 0, 0, 0, 0, 1 ], "bin_edges": [ -0.12467863410711288, -0.10549414902925491, -0.08630965650081635, -0.06712517142295837, -0.0479406863451004, -0.02875620126724243, -0.009571708738803864, 0.009612776339054108, 0.02879726141691208, 0.04798174649477005, 0.06716623157262802, 0.08635071665048599, 0.10553521662950516, 0.12471970170736313, 0.1439041793346405, 0.16308864951133728, 0.18227314949035645, 0.2014576494693756, 0.2206421196460724, 0.23982661962509155, 0.25901108980178833 ] } }, "transformer.layers.17.3.g": { "min": 0.4561373293399811, "max": 0.8428487777709961, "mean": 0.7054461240768433, "std": 0.03489769622683525, "abs_mean": 0.7054461240768433, "sparsity": 0.0, "shape": [ 1024 ], "norm": 22.601856231689453, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 0, 0, 2, 4, 5, 10, 20, 70, 158, 257, 238, 148, 46, 28, 7, 3, 3 ], "bin_edges": [ 0.4561373293399811, 0.47547289729118347, 0.49480846524238586, 0.5141440629959106, 0.533479630947113, 0.5528151988983154, 0.5721507668495178, 0.5914863348007202, 0.6108219027519226, 0.630157470703125, 0.6494930386543274, 0.6688286066055298, 0.6881641745567322, 0.7074997425079346, 0.7268353700637817, 0.7461708784103394, 0.7655065059661865, 0.7848420143127441, 0.8041776418685913, 0.8235131502151489, 0.8428487777709961 ] } }, "transformer.layers.17.4.ff.0.0.weight": { "min": -0.5113534331321716, "max": 0.3484715223312378, "mean": 0.0003426253970246762, "std": 0.04020649194717407, "abs_mean": 0.03195585310459137, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 82.33788299560547, "elements": 4194304, "histogram": { "counts": [ 3, 3, 9, 20, 28, 40, 60, 80, 105, 102, 138, 107, 90, 75, 58, 36, 21, 11, 9, 5 ], "bin_edges": [ -0.12453698366880417, -0.1125292107462883, -0.10052144527435303, -0.08851367235183716, -0.07650589942932129, -0.06449813395738602, -0.05249036103487015, -0.040482595562934875, -0.028474822640419006, -0.016467049717903137, -0.004459284245967865, 0.007548488676548004, 0.019556261599063873, 0.03156403452157974, 0.04357179254293442, 0.05557956546545029, 0.06758733838796616, 0.07959511131048203, 0.0916028842329979, 0.10361064225435257, 0.11561842262744904 ] } }, "transformer.layers.17.4.ff.0.0.bias": { "min": -0.18678922951221466, "max": 0.03952203318476677, "mean": -0.03937358409166336, "std": 0.02131999284029007, "abs_mean": 0.03990429267287254, "sparsity": 0.0, "shape": [ 4096 ], "norm": 2.865535259246826, "elements": 4096, "histogram": { "counts": [ 1, 1, 0, 4, 6, 10, 16, 48, 86, 95, 135, 144, 145, 119, 95, 45, 27, 11, 6, 6 ], "bin_edges": [ -0.1281859427690506, -0.12049932032823563, -0.11281269043684006, -0.10512606799602509, -0.09743943810462952, -0.08975281566381454, -0.08206619322299957, -0.074379563331604, -0.06669294089078903, -0.05900631844997406, -0.05131968855857849, -0.04363306611776352, -0.03594644367694855, -0.02825981378555298, -0.020573191344738007, -0.012886561453342438, -0.005199939012527466, 0.002486690878868103, 0.010173305869102478, 0.017859935760498047, 0.025546569377183914 ] } }, "transformer.layers.17.4.ff.2.weight": { "min": -0.5436691045761108, "max": 0.5556817054748535, "mean": -7.17876828275621e-05, "std": 0.05074293538928032, "abs_mean": 0.039733123034238815, "sparsity": 0.0, "shape": [ 1024, 4096 ], "norm": 103.91061401367188, "elements": 4194304, "histogram": { "counts": [ 2, 2, 10, 32, 45, 93, 135, 137, 167, 151, 103, 54, 35, 20, 10, 1, 2, 0, 0, 1 ], "bin_edges": [ -0.16656459867954254, -0.14604578912258148, -0.12552699446678162, -0.10500818490982056, -0.0844893753528595, -0.06397056579589844, -0.043451763689517975, -0.022932961583137512, -0.0024141520261764526, 0.018104657530784607, 0.038623467087745667, 0.05914226174354553, 0.07966107130050659, 0.10017986595630646, 0.12069867551326752, 0.14121748507022858, 0.16173629462718964, 0.1822551041841507, 0.20277391374111176, 0.22329272329807281, 0.24381153285503387 ] } }, "transformer.layers.17.4.ff.2.bias": { "min": -0.5110356211662292, "max": 0.6633175015449524, "mean": 0.002444919664412737, "std": 0.04948664829134941, "abs_mean": 0.03394554927945137, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.5847316980361938, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 0, 0, 2, 8, 128, 497, 325, 36, 1, 1, 0, 0, 0, 0, 0, 0, 1 ], "bin_edges": [ -0.5110356211662292, -0.4523179531097412, -0.39360031485557556, -0.3348826467990875, -0.2761650085449219, -0.21744734048843384, -0.1587296724319458, -0.10001203417778015, -0.041294366121292114, 0.017423272132873535, 0.07614094018936157, 0.1348586082458496, 0.19357627630233765, 0.2522939443588257, 0.31101155281066895, 0.369729220867157, 0.428446888923645, 0.48716455698013306, 0.5458821654319763, 0.6045998930931091, 0.6633175015449524 ] } }, "transformer.layers.18.0.weight": { "min": -0.3323739171028137, "max": 0.2654549777507782, "mean": 3.673961600725306e-06, "std": 0.019390413537621498, "abs_mean": 0.01544923335313797, "sparsity": 0.0, "shape": [ 1024, 2048 ], "norm": 28.079303741455078, "elements": 2097152, "histogram": { "counts": [ 1, 4, 6, 14, 21, 27, 60, 82, 114, 130, 140, 111, 109, 77, 53, 28, 11, 8, 3, 1 ], "bin_edges": [ -0.06729830801486969, -0.06076580286026001, -0.05423329770565033, -0.04770079255104065, -0.04116828739643097, -0.03463578224182129, -0.02810327708721161, -0.02157077193260193, -0.015038266777992249, -0.008505761623382568, -0.001973256468772888, 0.004559248685836792, 0.011091753840446472, 0.017624258995056152, 0.024156764149665833, 0.030689269304275513, 0.03722177445888519, 0.04375427961349487, 0.05028678476810455, 0.05681928992271423, 0.06335178762674332 ] } }, "transformer.layers.18.1.g": { "min": 0.32227811217308044, "max": 0.7648001313209534, "mean": 0.6509190201759338, "std": 0.04508262872695923, "abs_mean": 0.6509190201759338, "sparsity": 0.0, "shape": [ 1024 ], "norm": 20.87925910949707, "elements": 1024, "histogram": { "counts": [ 1, 0, 1, 0, 1, 1, 3, 3, 6, 10, 17, 27, 70, 126, 202, 219, 215, 82, 14, 2 ], "bin_edges": [ 0.32227811217308044, 0.3444042205810547, 0.36653029918670654, 0.3886564075946808, 0.41078251600265503, 0.4329086244106293, 0.4550347328186035, 0.47716081142425537, 0.4992869198322296, 0.5214130282402039, 0.5435391068458557, 0.5656652450561523, 0.5877913236618042, 0.609917402267456, 0.6320434808731079, 0.6541696190834045, 0.6762957572937012, 0.698421835899353, 0.7205479145050049, 0.7426739931106567, 0.7648001313209534 ] } }, "transformer.layers.18.2.to_q.weight": { "min": -0.24930793046951294, "max": 0.21936655044555664, "mean": -2.44708098762203e-06, "std": 0.036502547562122345, "abs_mean": 0.028754178434610367, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 37.37805938720703, "elements": 1048576, "histogram": { "counts": [ 2, 4, 9, 23, 31, 56, 68, 127, 116, 139, 124, 103, 80, 46, 36, 24, 5, 5, 1, 1 ], "bin_edges": [ -0.11012959480285645, -0.09836138784885406, -0.08659318089485168, -0.0748249739408493, -0.06305676698684692, -0.05128856375813484, -0.03952036052942276, -0.02775215357542038, -0.015983946621418, -0.004215739667415619, 0.0075524672865867615, 0.019320666790008545, 0.031088873744010925, 0.042857080698013306, 0.054625287652015686, 0.06639349460601807, 0.07816170156002045, 0.08992990851402283, 0.10169811546802521, 0.11346632242202759, 0.12523452937602997 ] } }, "transformer.layers.18.2.to_q.bias": { "min": -0.32666686177253723, "max": 0.2868551015853882, "mean": -0.0006774846115149558, "std": 0.03851696848869324, "abs_mean": 0.023683838546276093, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.232131838798523, "elements": 1024, "histogram": { "counts": [ 1, 0, 2, 2, 0, 4, 7, 7, 18, 160, 475, 262, 41, 7, 6, 5, 2, 0, 0, 1 ], "bin_edges": [ -0.32666686177253723, -0.2959907650947571, -0.26531466841697693, -0.23463857173919678, -0.20396247506141663, -0.17328637838363647, -0.14261028170585632, -0.11193418502807617, -0.08125808835029602, -0.05058199167251587, -0.019905894994735718, 0.010770201683044434, 0.041446298360824585, 0.07212239503860474, 0.10279849171638489, 0.13347458839416504, 0.1641506850719452, 0.19482681155204773, 0.2255028784275055, 0.25617894530296326, 0.2868551015853882 ] } }, "transformer.layers.18.2.to_k.weight": { "min": -0.3097042739391327, "max": 0.3694048821926117, "mean": 6.485832273028791e-05, "std": 0.03624315932393074, "abs_mean": 0.02854427509009838, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 37.11252975463867, "elements": 1048576, "histogram": { "counts": [ 6, 11, 17, 28, 44, 63, 78, 106, 128, 142, 131, 94, 46, 41, 23, 20, 10, 5, 3, 4 ], "bin_edges": [ -0.10426375269889832, -0.09263941645622253, -0.08101507276296616, -0.06939072906970978, -0.057766392827034, -0.04614205285906792, -0.03451771289110184, -0.02289336919784546, -0.011269032955169678, 0.0003553032875061035, 0.011979646980762482, 0.02360399067401886, 0.03522832691669464, 0.04685266315937042, 0.0584770143032074, 0.07010135054588318, 0.08172568678855896, 0.09335002303123474, 0.10497435927391052, 0.1165987104177475, 0.12822304666042328 ] } }, "transformer.layers.18.2.to_k.bias": { "min": -4.71013069152832, "max": 5.798623085021973, "mean": 0.03792855516076088, "std": 1.41161048412323, "abs_mean": 0.9221487641334534, "sparsity": 0.0, "shape": [ 1024 ], "norm": 45.16578674316406, "elements": 1024, "histogram": { "counts": [ 8, 12, 12, 18, 18, 20, 46, 103, 266, 244, 109, 60, 19, 22, 13, 5, 12, 7, 2, 4 ], "bin_edges": [ -4.71013069152832, -4.184692859649658, -3.659255266189575, -3.133817672729492, -2.60837984085083, -2.082942008972168, -1.557504415512085, -1.032066822052002, -0.5066289901733398, 0.018808841705322266, 0.5442466735839844, 1.0696840286254883, 1.5951218605041504, 2.1205596923828125, 2.6459970474243164, 3.1714348793029785, 3.6968727111816406, 4.2223100662231445, 4.747748374938965, 5.273185729980469, 5.798623085021973 ] } }, "transformer.layers.18.2.to_v.weight": { "min": -0.22137394547462463, "max": 0.20554855465888977, "mean": -7.500727951992303e-05, "std": 0.042491503059864044, "abs_mean": 0.033712420612573624, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 43.510765075683594, "elements": 1048576, "histogram": { "counts": [ 1, 0, 4, 10, 11, 26, 40, 58, 95, 146, 112, 117, 122, 102, 72, 51, 20, 6, 5, 2 ], "bin_edges": [ -0.1555749922990799, -0.14149212837219238, -0.12740924954414368, -0.11332638561725616, -0.09924351423978806, -0.08516064286231995, -0.07107777893543243, -0.056994907557964325, -0.042912036180496216, -0.028829172253608704, -0.014746293425559998, -0.0006634294986724854, 0.013419434428215027, 0.027502313256263733, 0.041585177183151245, 0.05566805601119995, 0.06975091993808746, 0.08383378386497498, 0.09791664779186249, 0.1119995266199112, 0.1260823905467987 ] } }, "transformer.layers.18.2.to_v.bias": { "min": -0.07746972888708115, "max": 0.05126894265413284, "mean": -0.0009250898147001863, "std": 0.016401393339037895, "abs_mean": 0.013242571614682674, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.5254228711128235, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 2, 1, 4, 15, 32, 72, 100, 146, 136, 146, 133, 107, 50, 39, 11, 2, 3 ], "bin_edges": [ -0.07746972888708115, -0.07103279232978821, -0.06459586322307587, -0.05815892666578293, -0.05172199383378029, -0.04528506100177765, -0.03884812444448471, -0.03241119161248207, -0.02597425878047943, -0.01953732594847679, -0.013100393116474152, -0.006663456559181213, -0.00022652000188827515, 0.006210409104824066, 0.012647345662117004, 0.019084274768829346, 0.025521211326122284, 0.03195814788341522, 0.038395076990127563, 0.0448320135474205, 0.05126894265413284 ] } }, "transformer.layers.18.2.to_out.0.weight": { "min": -0.33084556460380554, "max": 0.32904890179634094, "mean": -4.916631951346062e-06, "std": 0.042798250913619995, "abs_mean": 0.03404157981276512, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 43.824947357177734, "elements": 1048576, "histogram": { "counts": [ 3, 4, 11, 11, 40, 47, 57, 103, 107, 142, 101, 112, 98, 80, 43, 19, 12, 7, 1, 2 ], "bin_edges": [ -0.1374271959066391, -0.12343340367078781, -0.10943961143493652, -0.09544582664966583, -0.08145203441381454, -0.06745824217796326, -0.053464457392692566, -0.03947066515684128, -0.02547687292098999, -0.0114830881357193, 0.0025107115507125854, 0.016504496335983276, 0.030498281121253967, 0.04449208080768585, 0.05848586559295654, 0.07247966527938843, 0.08647345006465912, 0.10046723484992981, 0.1144610196352005, 0.12845481932163239, 0.14244860410690308 ] } }, "transformer.layers.18.2.to_out.0.bias": { "min": -0.2845572233200073, "max": 0.11143017560243607, "mean": -0.0012043914757668972, "std": 0.04699280112981796, "abs_mean": 0.03843585401773453, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.5035291910171509, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 0, 0, 0, 0, 0, 4, 25, 53, 86, 139, 138, 157, 137, 117, 84, 45, 14 ], "bin_edges": [ -0.2845572233200073, -0.26475784182548523, -0.24495849013328552, -0.22515910863876343, -0.20535974204540253, -0.18556037545204163, -0.16576099395751953, -0.14596162736415863, -0.12616226077079773, -0.10636289417743683, -0.08656352758407593, -0.06676414608955383, -0.04696477949619293, -0.02716541290283203, -0.0073660314083099365, 0.01243332028388977, 0.032232701778411865, 0.05203208327293396, 0.07183143496513367, 0.09163081645965576, 0.11143017560243607 ] } }, "transformer.layers.18.3.g": { "min": 0.48666608333587646, "max": 0.885034441947937, "mean": 0.7373895049095154, "std": 0.03794779255986214, "abs_mean": 0.7373895049095154, "sparsity": 0.0, "shape": [ 1024 ], "norm": 23.62765884399414, "elements": 1024, "histogram": { "counts": [ 2, 1, 2, 2, 2, 1, 2, 3, 6, 28, 83, 195, 270, 216, 119, 41, 16, 4, 3, 4 ], "bin_edges": [ 0.48666608333587646, 0.5065845251083374, 0.5265029072761536, 0.5464213490486145, 0.5663397312164307, 0.5862581729888916, 0.6061766147613525, 0.6260949969291687, 0.6460134387016296, 0.6659318208694458, 0.6858502626419067, 0.7057687044143677, 0.7256870865821838, 0.74560546875, 0.7655239105224609, 0.7854423522949219, 0.8053607940673828, 0.825279176235199, 0.8451975584030151, 0.8651160001754761, 0.885034441947937 ] } }, "transformer.layers.18.4.ff.0.0.weight": { "min": -0.3611343502998352, "max": 0.27392831444740295, "mean": 5.1206770876888186e-05, "std": 0.04065323248505592, "abs_mean": 0.032221995294094086, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 83.24979400634766, "elements": 4194304, "histogram": { "counts": [ 2, 2, 11, 22, 24, 53, 90, 105, 121, 146, 116, 111, 73, 58, 36, 18, 10, 1, 0, 1 ], "bin_edges": [ -0.13471700251102448, -0.1202903464436531, -0.10586368292570114, -0.09143702685832977, -0.07701036334037781, -0.06258370727300644, -0.04815705120563507, -0.033730387687683105, -0.019303731620311737, -0.004877075552940369, 0.009549587965011597, 0.023976251482963562, 0.038402900099754333, 0.0528295636177063, 0.06725622713565826, 0.08168287575244904, 0.096109539270401, 0.11053620278835297, 0.12496285140514374, 0.1393895298242569, 0.15381617844104767 ] } }, "transformer.layers.18.4.ff.0.0.bias": { "min": -0.2472306787967682, "max": 0.046531591564416885, "mean": -0.03925502672791481, "std": 0.023223698139190674, "abs_mean": 0.039705656468868256, "sparsity": 0.0, "shape": [ 4096 ], "norm": 2.9189653396606445, "elements": 4096, "histogram": { "counts": [ 1, 0, 0, 0, 0, 0, 0, 3, 1, 0, 9, 22, 49, 155, 251, 220, 190, 71, 20, 8 ], "bin_edges": [ -0.2472306787967682, -0.2334253042936325, -0.21961992979049683, -0.20581455528736115, -0.19200918078422546, -0.17820380628108978, -0.1643984317779541, -0.15059305727481842, -0.13678768277168274, -0.12298230826854706, -0.10917693376541138, -0.0953715592622757, -0.08156618475914001, -0.06776081025600433, -0.05395543575286865, -0.04015006124973297, -0.02634468674659729, -0.012539312243461609, 0.0012660622596740723, 0.01507142186164856, 0.02887682616710663 ] } }, "transformer.layers.18.4.ff.2.weight": { "min": -0.62546706199646, "max": 0.596234142780304, "mean": -6.186795508256182e-05, "std": 0.0531260222196579, "abs_mean": 0.04139447957277298, "sparsity": 0.0, "shape": [ 1024, 4096 ], "norm": 108.79020690917969, "elements": 4194304, "histogram": { "counts": [ 1, 1, 3, 10, 29, 69, 135, 217, 218, 178, 77, 42, 12, 5, 1, 0, 1, 0, 0, 1 ], "bin_edges": [ -0.22725528478622437, -0.1996065378189087, -0.17195777595043182, -0.14430902898311615, -0.11666027456521988, -0.08901152014732361, -0.061362773180007935, -0.03371401131153107, -0.006065264344215393, 0.02158348262310028, 0.04923224449157715, 0.07688099145889282, 0.1045297384262085, 0.13217848539352417, 0.15982726216316223, 0.1874760091304779, 0.21512475609779358, 0.24277350306510925, 0.2704222500324249, 0.298071026802063, 0.32571980357170105 ] } }, "transformer.layers.18.4.ff.2.bias": { "min": -0.7086492776870728, "max": 0.2654070556163788, "mean": 0.0009191531571559608, "std": 0.05119417607784271, "abs_mean": 0.035680606961250305, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.6376776695251465, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 5, 40, 215, 441, 242, 42, 10, 1, 2 ], "bin_edges": [ -0.7086492776870728, -0.6599464416503906, -0.6112436652183533, -0.5625408291816711, -0.513837993144989, -0.4651351869106293, -0.41643238067626953, -0.3677295446395874, -0.31902673840522766, -0.2703239321708679, -0.2216210961341858, -0.17291826009750366, -0.12421548366546631, -0.07551264762878418, -0.02680981159210205, 0.021892964839935303, 0.07059580087661743, 0.11929863691329956, 0.16800141334533691, 0.21670424938201904, 0.2654070556163788 ] } }, "transformer.layers.19.0.weight": { "min": -0.34331265091896057, "max": 0.30340248346328735, "mean": 2.3374013835564256e-07, "std": 0.019139692187309265, "abs_mean": 0.015232603996992111, "sparsity": 0.0, "shape": [ 1024, 2048 ], "norm": 27.716217041015625, "elements": 2097152, "histogram": { "counts": [ 7, 10, 21, 25, 45, 68, 64, 107, 120, 115, 105, 99, 69, 56, 38, 30, 14, 4, 1, 2 ], "bin_edges": [ -0.05381210148334503, -0.04800749570131302, -0.04220288619399071, -0.036398276686668396, -0.030593670904636383, -0.02478906325995922, -0.01898445561528206, -0.013179846107959747, -0.007375240325927734, -0.0015706345438957214, 0.00423397496342659, 0.010038584470748901, 0.015843190252780914, 0.021647796034812927, 0.027452409267425537, 0.03325701504945755, 0.03906162083148956, 0.044866226613521576, 0.05067083239555359, 0.0564754456281662, 0.06228005141019821 ] } }, "transformer.layers.19.1.g": { "min": 0.3500247001647949, "max": 0.7813002467155457, "mean": 0.6387312412261963, "std": 0.048984214663505554, "abs_mean": 0.6387312412261963, "sparsity": 0.0, "shape": [ 1024 ], "norm": 20.499359130859375, "elements": 1024, "histogram": { "counts": [ 2, 0, 0, 0, 1, 6, 12, 14, 10, 24, 53, 83, 164, 193, 193, 160, 67, 13, 4, 1 ], "bin_edges": [ 0.3500247001647949, 0.37158846855163574, 0.39315226674079895, 0.41471603512763977, 0.4362798035144806, 0.4578436017036438, 0.4794073700904846, 0.5009711384773254, 0.5225349068641663, 0.5440987348556519, 0.5656625032424927, 0.5872262716293335, 0.6087900400161743, 0.6303538084030151, 0.651917576789856, 0.6734813451766968, 0.6950451135635376, 0.7166088819503784, 0.738172709941864, 0.7597364783287048, 0.7813002467155457 ] } }, "transformer.layers.19.2.to_q.weight": { "min": -0.20559599995613098, "max": 0.20657846331596375, "mean": -5.995870742481202e-05, "std": 0.03769858554005623, "abs_mean": 0.02982865273952484, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 38.60289001464844, "elements": 1048576, "histogram": { "counts": [ 1, 3, 4, 4, 18, 40, 60, 94, 114, 145, 151, 133, 99, 67, 33, 18, 10, 4, 1, 1 ], "bin_edges": [ -0.13801053166389465, -0.12424114346504211, -0.11047175526618958, -0.09670236706733704, -0.0829329788684845, -0.06916359066963196, -0.05539420247077942, -0.04162481427192688, -0.02785542607307434, -0.014086037874221802, -0.0003166496753692627, 0.013452738523483276, 0.027222126722335815, 0.040991514921188354, 0.054760903120040894, 0.06853029131889343, 0.08229967951774597, 0.09606906771659851, 0.10983845591545105, 0.12360784411430359, 0.13737723231315613 ] } }, "transformer.layers.19.2.to_q.bias": { "min": -0.25827330350875854, "max": 0.26797717809677124, "mean": -0.00040583324152976274, "std": 0.04458905756473541, "abs_mean": 0.030639849603176117, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.426212191581726, "elements": 1024, "histogram": { "counts": [ 1, 3, 1, 1, 2, 9, 17, 53, 173, 308, 262, 110, 34, 5, 10, 3, 4, 3, 0, 1 ], "bin_edges": [ -0.25827330350875854, -0.23196077346801758, -0.2056482583284378, -0.17933572828769684, -0.15302321314811707, -0.1267106831073761, -0.10039815306663513, -0.07408563792705536, -0.04777310788631439, -0.021460577845573425, 0.004851937294006348, 0.031164467334747314, 0.05747699737548828, 0.08378952741622925, 0.11010202765464783, 0.1364145576953888, 0.16272708773612976, 0.18903961777687073, 0.2153521478176117, 0.24166464805603027, 0.26797717809677124 ] } }, "transformer.layers.19.2.to_k.weight": { "min": -0.35375165939331055, "max": 0.32213273644447327, "mean": -7.335219379456248e-06, "std": 0.03720685839653015, "abs_mean": 0.029421523213386536, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 38.09929656982422, "elements": 1048576, "histogram": { "counts": [ 3, 2, 5, 11, 25, 36, 54, 93, 104, 120, 141, 134, 95, 66, 43, 30, 16, 13, 5, 4 ], "bin_edges": [ -0.1217818558216095, -0.11000676453113556, -0.09823167324066162, -0.08645657449960709, -0.07468148320913315, -0.06290639191865921, -0.051131293177604675, -0.03935620188713074, -0.0275811105966568, -0.01580601930618286, -0.004030928015708923, 0.007744163274765015, 0.019519269466400146, 0.031294360756874084, 0.04306945204734802, 0.05484454333782196, 0.0666196346282959, 0.07839472591876984, 0.09016981720924377, 0.10194490849971771, 0.11371999979019165 ] } }, "transformer.layers.19.2.to_k.bias": { "min": -5.253459930419922, "max": 4.198183536529541, "mean": -0.0263908039778471, "std": 1.0056793689727783, "abs_mean": 0.6567726135253906, "sparsity": 0.0, "shape": [ 1024 ], "norm": 32.17710876464844, "elements": 1024, "histogram": { "counts": [ 1, 1, 3, 2, 9, 10, 13, 32, 36, 79, 265, 316, 129, 31, 27, 19, 14, 10, 2, 1 ], "bin_edges": [ -5.253459930419922, -4.780877590179443, -4.308295726776123, -3.8357133865356445, -3.363131284713745, -2.8905491828918457, -2.417966842651367, -1.9453847408294678, -1.4728026390075684, -1.0002202987670898, -0.5276384353637695, -0.055056095123291016, 0.4175262451171875, 0.8901081085205078, 1.3626904487609863, 1.8352723121643066, 2.307854652404785, 2.7804365158081055, 3.253019332885742, 3.7256011962890625, 4.198183536529541 ] } }, "transformer.layers.19.2.to_v.weight": { "min": -0.23853513598442078, "max": 0.24350698292255402, "mean": -2.5575776817277074e-05, "std": 0.04321583732962608, "abs_mean": 0.03416847437620163, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 44.25249481201172, "elements": 1048576, "histogram": { "counts": [ 1, 4, 9, 18, 30, 48, 82, 91, 120, 131, 129, 115, 84, 62, 41, 13, 13, 5, 1, 3 ], "bin_edges": [ -0.1413203775882721, -0.1268763244152069, -0.11243227869272232, -0.09798823297023773, -0.08354417979717255, -0.06910013407468796, -0.05465608835220337, -0.040212035179138184, -0.025767989456653595, -0.011323943734169006, 0.003120109438896179, 0.017564162611961365, 0.032008200883865356, 0.04645225405693054, 0.06089630722999573, 0.07534034550189972, 0.0897843986749649, 0.10422845184803009, 0.11867249011993408, 0.13311654329299927, 0.14756059646606445 ] } }, "transformer.layers.19.2.to_v.bias": { "min": -0.06232254579663277, "max": 0.05653427913784981, "mean": 0.0003516775614116341, "std": 0.014141896739602089, "abs_mean": 0.011390534229576588, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.4524596631526947, "elements": 1024, "histogram": { "counts": [ 1, 0, 3, 1, 6, 13, 35, 90, 109, 148, 154, 146, 147, 87, 38, 15, 4, 1, 1, 1 ], "bin_edges": [ -0.06232254579663277, -0.05637970566749573, -0.05043686181306839, -0.04449402168393135, -0.03855118155479431, -0.03260834142565727, -0.026665497571229935, -0.020722657442092896, -0.014779817312955856, -0.008836977183818817, -0.002894137054681778, 0.0030487067997455597, 0.008991550654172897, 0.014934387058019638, 0.020877230912446976, 0.026820067316293716, 0.032762911170721054, 0.03870575502514839, 0.04464859142899513, 0.05059143528342247, 0.05653427913784981 ] } }, "transformer.layers.19.2.to_out.0.weight": { "min": -0.437425822019577, "max": 0.3736904561519623, "mean": 1.4616346561524551e-05, "std": 0.044127896428108215, "abs_mean": 0.03491860628128052, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 45.18648910522461, "elements": 1048576, "histogram": { "counts": [ 3, 4, 3, 21, 23, 48, 62, 87, 109, 114, 128, 127, 111, 53, 50, 24, 14, 13, 3, 3 ], "bin_edges": [ -0.1402941793203354, -0.12645918130874634, -0.1126241683959961, -0.09878916293382645, -0.0849541574716568, -0.07111915200948715, -0.057284146547317505, -0.04344914108514786, -0.02961413562297821, -0.015779130160808563, -0.001944124698638916, 0.011890873312950134, 0.02572588622570038, 0.03956089913845062, 0.05339589715003967, 0.06723089516162872, 0.08106590807437897, 0.09490092098712921, 0.10873591899871826, 0.12257091701030731, 0.13640592992305756 ] } }, "transformer.layers.19.2.to_out.0.bias": { "min": -0.09596914798021317, "max": 0.17601557075977325, "mean": -0.0006586366798728704, "std": 0.03512872755527496, "abs_mean": 0.028804786503314972, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.1237679719924927, "elements": 1024, "histogram": { "counts": [ 4, 15, 38, 88, 99, 107, 136, 149, 118, 125, 59, 46, 12, 3, 0, 0, 0, 0, 0, 1 ], "bin_edges": [ -0.09596914798021317, -0.08236990869045258, -0.06877067685127258, -0.05517143756151199, -0.0415722019970417, -0.02797296643257141, -0.014373727142810822, -0.0007744953036308289, 0.01282474398612976, 0.02642398327589035, 0.04002321511507034, 0.053622446954250336, 0.06722169369459152, 0.08082092553377151, 0.09442015737295151, 0.1080194041132927, 0.12161863595247269, 0.13521787524223328, 0.14881712198257446, 0.16241633892059326, 0.17601557075977325 ] } }, "transformer.layers.19.3.g": { "min": 0.42178472876548767, "max": 1.06712007522583, "mean": 0.7484290599822998, "std": 0.04182668402791023, "abs_mean": 0.7484290599822998, "sparsity": 0.0, "shape": [ 1024 ], "norm": 23.987064361572266, "elements": 1024, "histogram": { "counts": [ 1, 1, 4, 1, 3, 3, 4, 22, 73, 291, 408, 157, 24, 6, 0, 1, 0, 0, 0, 1 ], "bin_edges": [ 0.42178472876548767, 0.45405149459838867, 0.4863182604312897, 0.5185850262641907, 0.5508518218994141, 0.5831185579299927, 0.6153852939605713, 0.6476520895957947, 0.6799188852310181, 0.7121856212615967, 0.7444523572921753, 0.7767191529273987, 0.8089859485626221, 0.8412526845932007, 0.8735194206237793, 0.9057862162590027, 0.9380530118942261, 0.9703197479248047, 1.0025864839553833, 1.0348533391952515, 1.06712007522583 ] } }, "transformer.layers.19.4.ff.0.0.weight": { "min": -0.26583534479141235, "max": 0.29665902256965637, "mean": -7.891673885751516e-05, "std": 0.04081389307975769, "abs_mean": 0.032326411455869675, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 83.57918548583984, "elements": 4194304, "histogram": { "counts": [ 1, 4, 8, 13, 21, 53, 78, 130, 144, 145, 138, 111, 72, 41, 20, 15, 4, 1, 0, 1 ], "bin_edges": [ -0.1381877362728119, -0.12331786006689072, -0.10844798386096954, -0.09357810765504837, -0.0787082314491272, -0.06383835524320602, -0.04896847903728485, -0.03409860283136368, -0.019228726625442505, -0.004358857870101929, 0.010511025786399841, 0.02538090944290161, 0.04025077819824219, 0.055120646953582764, 0.06999053061008453, 0.0848604142665863, 0.09973028302192688, 0.11460015177726746, 0.12947002053260803, 0.144339919090271, 0.15920978784561157 ] } }, "transformer.layers.19.4.ff.0.0.bias": { "min": -0.18455219268798828, "max": 0.043140046298503876, "mean": -0.03679502755403519, "std": 0.0255513247102499, "abs_mean": 0.037825148552656174, "sparsity": 0.0, "shape": [ 4096 ], "norm": 2.866874933242798, "elements": 4096, "histogram": { "counts": [ 3, 1, 4, 4, 4, 6, 25, 28, 45, 79, 129, 176, 166, 140, 108, 45, 25, 7, 3, 2 ], "bin_edges": [ -0.1534748673439026, -0.1436709612607956, -0.1338670551776886, -0.12406314164400101, -0.11425923556089401, -0.10445532947778702, -0.09465141594409943, -0.08484750986099243, -0.07504360377788544, -0.06523969769477844, -0.05543579161167145, -0.045631878077983856, -0.03582797199487686, -0.02602405846118927, -0.016220152378082275, -0.006416246294975281, 0.003387659788131714, 0.013191565871238708, 0.022995471954345703, 0.0327993780374527, 0.042603280395269394 ] } }, "transformer.layers.19.4.ff.2.weight": { "min": -0.45756417512893677, "max": 0.4861648976802826, "mean": 4.3982381612295285e-05, "std": 0.05422103777527809, "abs_mean": 0.042265694588422775, "sparsity": 0.0, "shape": [ 1024, 4096 ], "norm": 111.03323364257812, "elements": 4194304, "histogram": { "counts": [ 1, 2, 0, 4, 10, 17, 55, 56, 129, 151, 178, 163, 111, 72, 34, 10, 5, 0, 1, 1 ], "bin_edges": [ -0.23315100371837616, -0.21043646335601807, -0.18772193789482117, -0.16500739753246307, -0.14229285717010498, -0.11957833170890808, -0.09686379134654999, -0.07414926588535309, -0.051434725522994995, -0.028720185160636902, -0.0060056596994400024, 0.01670888066291809, 0.039423421025276184, 0.062137946486473083, 0.08485247194766998, 0.10756702721118927, 0.13028155267238617, 0.15299607813358307, 0.17571063339710236, 0.19842515885829926, 0.22113966941833496 ] } }, "transformer.layers.19.4.ff.2.bias": { "min": -0.2858409285545349, "max": 0.5508930087089539, "mean": -0.0008807203266769648, "std": 0.047792647033929825, "abs_mean": 0.033110618591308594, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.5288777351379395, "elements": 1024, "histogram": { "counts": [ 1, 1, 1, 1, 2, 17, 54, 154, 306, 297, 111, 47, 7, 0, 0, 0, 0, 0, 0, 1 ], "bin_edges": [ -0.2858409285545349, -0.253815621137619, -0.22179031372070312, -0.18976500630378723, -0.15773969888687134, -0.12571439146995544, -0.09368908405303955, -0.06166377663612366, -0.029638469219207764, 0.00238683819770813, 0.03441214561462402, 0.06643745303153992, 0.09846276044845581, 0.1304880678653717, 0.1625133752822876, 0.1945386826992035, 0.22656399011611938, 0.25858932733535767, 0.29061460494995117, 0.3226398825645447, 0.35466521978378296 ] } }, "transformer.layers.20.0.weight": { "min": -0.2925868332386017, "max": 0.32265621423721313, "mean": 6.008186119288439e-06, "std": 0.0199727825820446, "abs_mean": 0.015888523310422897, "sparsity": 0.0, "shape": [ 1024, 2048 ], "norm": 28.922727584838867, "elements": 2097152, "histogram": { "counts": [ 9, 12, 45, 61, 89, 149, 146, 141, 145, 96, 55, 29, 15, 5, 1, 1, 0, 0, 0, 1 ], "bin_edges": [ -0.054424017667770386, -0.04656292125582695, -0.038701824843883514, -0.030840732157230377, -0.02297963574528694, -0.015118539333343506, -0.007257446646690369, 0.000603649765253067, 0.008464746177196503, 0.01632583886384964, 0.024186939001083374, 0.03204803168773651, 0.03990912437438965, 0.04777022451162338, 0.05563131719827652, 0.06349241733551025, 0.07135351002216339, 0.07921460270881653, 0.08707569539546967, 0.094936802983284, 0.10279788821935654 ] } }, "transformer.layers.20.1.g": { "min": 0.2913132309913635, "max": 0.7585903406143188, "mean": 0.6507112979888916, "std": 0.05193017050623894, "abs_mean": 0.6507112979888916, "sparsity": 0.0, "shape": [ 1024 ], "norm": 20.88890266418457, "elements": 1024, "histogram": { "counts": [ 2, 0, 0, 0, 1, 2, 3, 4, 10, 12, 15, 20, 40, 77, 147, 229, 251, 132, 46, 9 ], "bin_edges": [ 0.2913132309913635, 0.31467708945274353, 0.33804094791412354, 0.36140477657318115, 0.38476866483688354, 0.40813249349594116, 0.43149635195732117, 0.45486021041870117, 0.4782240688800812, 0.5015879273414612, 0.5249518156051636, 0.5483156442642212, 0.5716794729232788, 0.5950433015823364, 0.6184071898460388, 0.6417710781097412, 0.6651349067687988, 0.6884987354278564, 0.7118626236915588, 0.7352265119552612, 0.7585903406143188 ] } }, "transformer.layers.20.2.to_q.weight": { "min": -0.24352194368839264, "max": 0.26151588559150696, "mean": -5.6967542150232475e-06, "std": 0.03961416333913803, "abs_mean": 0.03143656626343727, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 40.564388275146484, "elements": 1048576, "histogram": { "counts": [ 3, 2, 2, 5, 10, 28, 41, 73, 99, 142, 174, 144, 94, 80, 57, 22, 15, 2, 3, 4 ], "bin_edges": [ -0.15721261501312256, -0.14256204664707184, -0.1279114931821823, -0.11326092481613159, -0.09861036390066147, -0.08395980298519135, -0.06930923461914062, -0.0546586737036705, -0.04000811278820038, -0.025357544422149658, -0.010706990957260132, 0.003943577408790588, 0.01859414577484131, 0.033244699239730835, 0.047895267605781555, 0.06254582107067108, 0.0771963894367218, 0.09184695780277252, 0.10649752616882324, 0.12114807963371277, 0.1357986181974411 ] } }, "transformer.layers.20.2.to_q.bias": { "min": -0.26712363958358765, "max": 0.19983239471912384, "mean": -0.0008771903812885284, "std": 0.0517287477850914, "abs_mean": 0.03703365474939346, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.6547496318817139, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 3, 6, 9, 10, 24, 49, 102, 196, 252, 165, 92, 42, 21, 12, 2, 10, 4 ], "bin_edges": [ -0.26712363958358765, -0.2437758445739746, -0.22042803466320038, -0.19708023965358734, -0.1737324297428131, -0.15038463473320007, -0.12703683972358704, -0.1036890298128128, -0.08034123480319977, -0.05699343979358673, -0.0336456298828125, -0.010297834873199463, 0.013049960136413574, 0.03639775514602661, 0.059745579957962036, 0.08309337496757507, 0.10644116997718811, 0.12978896498680115, 0.15313675999641418, 0.1764845848083496, 0.19983239471912384 ] } }, "transformer.layers.20.2.to_k.weight": { "min": -0.2718246877193451, "max": 0.25335949659347534, "mean": 5.239124220679514e-06, "std": 0.03871086984872818, "abs_mean": 0.030697766691446304, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 39.639469146728516, "elements": 1048576, "histogram": { "counts": [ 2, 2, 7, 13, 20, 60, 71, 100, 142, 121, 141, 116, 84, 55, 31, 19, 9, 6, 0, 1 ], "bin_edges": [ -0.1246657595038414, -0.11169023811817169, -0.09871471673250198, -0.08573919534683228, -0.07276367396116257, -0.05978815257549286, -0.04681263118982315, -0.03383710980415344, -0.020861588418483734, -0.007886067032814026, 0.005089454352855682, 0.018064983189105988, 0.0310404971241951, 0.04401601105928421, 0.056991539895534515, 0.06996706873178482, 0.08294258266687393, 0.09591809660196304, 0.10889362543821335, 0.12186915427446365, 0.13484467566013336 ] } }, "transformer.layers.20.2.to_k.bias": { "min": -12.94522476196289, "max": 15.922240257263184, "mean": 0.03318937495350838, "std": 1.9867888689041138, "abs_mean": 1.0522326231002808, "sparsity": 0.0, "shape": [ 1024 ], "norm": 63.5550651550293, "elements": 1024, "histogram": { "counts": [ 1, 2, 2, 4, 6, 5, 17, 62, 421, 384, 56, 15, 9, 5, 2, 6, 2, 0, 0, 1 ], "bin_edges": [ -12.94522476196289, -11.501851081848145, -10.058478355407715, -8.615104675292969, -7.171731472015381, -5.728358268737793, -4.284984588623047, -2.841611862182617, -1.398238182067871, 0.045135498046875, 1.4885082244873047, 2.931881904602051, 4.375255584716797, 5.818628311157227, 7.262001037597656, 8.705375671386719, 10.148748397827148, 11.592121124267578, 13.03549575805664, 14.47886848449707, 15.922240257263184 ] } }, "transformer.layers.20.2.to_v.weight": { "min": -0.20649555325508118, "max": 0.22559243440628052, "mean": -7.256461685756221e-05, "std": 0.040558841079473495, "abs_mean": 0.03210737928748131, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 41.531700134277344, "elements": 1048576, "histogram": { "counts": [ 3, 2, 1, 8, 18, 38, 64, 99, 114, 132, 137, 112, 109, 82, 42, 22, 6, 5, 4, 2 ], "bin_edges": [ -0.14026805758476257, -0.12651239335536957, -0.11275672912597656, -0.09900107234716415, -0.08524540811777115, -0.07148974388837814, -0.057734087109565735, -0.04397842288017273, -0.030222758650779724, -0.01646709442138672, -0.0027114301919937134, 0.011044234037399292, 0.024799883365631104, 0.03855554759502411, 0.052311211824417114, 0.06606687605381012, 0.07982254028320312, 0.09357820451259613, 0.10733386874198914, 0.12108951807022095, 0.13484519720077515 ] } }, "transformer.layers.20.2.to_v.bias": { "min": -0.06932304799556732, "max": 0.06304260343313217, "mean": 0.0001579949603183195, "std": 0.014740646816790104, "abs_mean": 0.011740190908312798, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.47149741649627686, "elements": 1024, "histogram": { "counts": [ 1, 1, 1, 0, 1, 7, 25, 99, 135, 159, 159, 157, 137, 71, 26, 10, 5, 1, 2, 3 ], "bin_edges": [ -0.06932304799556732, -0.06270476430654526, -0.05608648434281349, -0.04946820065379143, -0.04284992069005966, -0.0362316370010376, -0.029613353312015533, -0.022995073348283768, -0.016376789659261703, -0.00975850597023964, -0.0031402260065078735, 0.0034780576825141907, 0.010096341371536255, 0.01671462506055832, 0.023332901298999786, 0.02995118498802185, 0.036569468677043915, 0.04318775236606598, 0.04980603605508804, 0.05642431974411011, 0.06304260343313217 ] } }, "transformer.layers.20.2.to_out.0.weight": { "min": -0.4653640687465668, "max": 0.3200652003288269, "mean": 1.952598540810868e-05, "std": 0.04059439152479172, "abs_mean": 0.032202161848545074, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 41.56819152832031, "elements": 1048576, "histogram": { "counts": [ 1, 1, 4, 0, 11, 21, 27, 66, 88, 111, 134, 119, 140, 93, 64, 61, 29, 15, 10, 5 ], "bin_edges": [ -0.15654049813747406, -0.1427799016237259, -0.12901930510997772, -0.11525871604681015, -0.10149811953306198, -0.08773752301931381, -0.07397693395614624, -0.06021633744239807, -0.0464557409286499, -0.03269514441490173, -0.018934547901153564, -0.0051739513874053955, 0.00858663022518158, 0.02234722673892975, 0.03610782325267792, 0.049868419766426086, 0.06362901628017426, 0.07738961279392242, 0.0911502093076706, 0.10491080582141876, 0.11867139488458633 ] } }, "transformer.layers.20.2.to_out.0.bias": { "min": -0.06398282200098038, "max": 0.11537733674049377, "mean": 0.0011978133115917444, "std": 0.02469516545534134, "abs_mean": 0.019984934478998184, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.7907887697219849, "elements": 1024, "histogram": { "counts": [ 5, 23, 26, 66, 101, 118, 134, 139, 117, 103, 81, 46, 24, 11, 3, 2, 0, 0, 0, 1 ], "bin_edges": [ -0.06398282200098038, -0.05501481518149376, -0.04604680836200714, -0.037078797817230225, -0.028110790997743607, -0.01914278417825699, -0.010174773633480072, -0.0012067705392837524, 0.007761240005493164, 0.01672925055027008, 0.0256972536444664, 0.03466526418924332, 0.04363327473402023, 0.05260127782821655, 0.06156928092241287, 0.07053729146718979, 0.0795053020119667, 0.08847331255674362, 0.09744132310152054, 0.10640931874513626, 0.11537733674049377 ] } }, "transformer.layers.20.3.g": { "min": 0.3749999403953552, "max": 0.9300609230995178, "mean": 0.7510109543800354, "std": 0.040018972009420395, "abs_mean": 0.7510109543800354, "sparsity": 0.0, "shape": [ 1024 ], "norm": 24.0664119720459, "elements": 1024, "histogram": { "counts": [ 1, 0, 1, 0, 4, 1, 4, 1, 4, 6, 11, 21, 149, 467, 262, 53, 8, 5, 0, 2 ], "bin_edges": [ 0.3749999403953552, 0.40275299549102783, 0.43050605058670044, 0.45825910568237305, 0.48601213097572327, 0.5137652158737183, 0.5415182113647461, 0.5692712664604187, 0.5970243215560913, 0.6247773766517639, 0.6525304317474365, 0.6802834868431091, 0.7080365419387817, 0.7357895970344543, 0.763542652130127, 0.7912956476211548, 0.8190487623214722, 0.8468017578125, 0.8745548725128174, 0.9023078680038452, 0.9300609230995178 ] } }, "transformer.layers.20.4.ff.0.0.weight": { "min": -0.27868181467056274, "max": 0.27277180552482605, "mean": -0.00016834630514495075, "std": 0.041004978120326996, "abs_mean": 0.03246169537305832, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 83.97103881835938, "elements": 4194304, "histogram": { "counts": [ 4, 6, 8, 17, 36, 62, 93, 121, 129, 139, 115, 111, 56, 45, 27, 16, 8, 3, 3, 1 ], "bin_edges": [ -0.1280173808336258, -0.11423726379871368, -0.10045714676380157, -0.08667702972888947, -0.07289691269397736, -0.05911679565906525, -0.04533667862415314, -0.03155656158924103, -0.01777644455432892, -0.003996327519416809, 0.0097837895154953, 0.02356390655040741, 0.03734402358531952, 0.05112414062023163, 0.06490425765514374, 0.07868437469005585, 0.09246449172496796, 0.10624460875988007, 0.12002472579479218, 0.13380484282970428, 0.1475849598646164 ] } }, "transformer.layers.20.4.ff.0.0.bias": { "min": -0.19812321662902832, "max": 0.05135354399681091, "mean": -0.032012395560741425, "std": 0.025048717856407166, "abs_mean": 0.03351156413555145, "sparsity": 0.0, "shape": [ 4096 ], "norm": 2.6013293266296387, "elements": 4096, "histogram": { "counts": [ 1, 1, 0, 2, 1, 2, 4, 6, 11, 22, 53, 116, 184, 203, 192, 129, 53, 16, 2, 2 ], "bin_edges": [ -0.19812321662902832, -0.185712069272995, -0.17330093681812286, -0.16088978946208954, -0.1484786570072174, -0.13606750965118408, -0.12365636229515076, -0.11124522238969803, -0.0988340824842453, -0.08642294257879257, -0.07401180267333984, -0.06160065531730652, -0.04918950796127319, -0.03677837550640106, -0.024367228150367737, -0.011956095695495605, 0.0004550516605377197, 0.012866199016571045, 0.025277331471443176, 0.0376884788274765, 0.05009962618350983 ] } }, "transformer.layers.20.4.ff.2.weight": { "min": -0.65754234790802, "max": 0.5349372029304504, "mean": -5.049940591561608e-05, "std": 0.052857208997011185, "abs_mean": 0.04115595668554306, "sparsity": 0.0, "shape": [ 1024, 4096 ], "norm": 108.23979187011719, "elements": 4194304, "histogram": { "counts": [ 2, 2, 7, 8, 13, 37, 56, 111, 121, 179, 164, 129, 83, 42, 22, 11, 10, 2, 0, 1 ], "bin_edges": [ -0.2004147469997406, -0.1797032356262207, -0.158991739153862, -0.1382802277803421, -0.1175687238574028, -0.0968572199344635, -0.0761457085609436, -0.0554342120885849, -0.034722700715065, -0.014011189341545105, 0.006700307130813599, 0.027411818504333496, 0.048123329877853394, 0.06883484125137329, 0.0895463228225708, 0.1102578341960907, 0.1309693455696106, 0.1516808569431305, 0.1723923683166504, 0.1931038498878479, 0.213815376162529 ] } }, "transformer.layers.20.4.ff.2.bias": { "min": -0.1923648864030838, "max": 0.5813060998916626, "mean": -0.0005128913326188922, "std": 0.041049525141716, "abs_mean": 0.028079785406589508, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.3130457401275635, "elements": 1024, "histogram": { "counts": [ 3, 2, 11, 116, 410, 357, 81, 15, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1 ], "bin_edges": [ -0.1923648864030838, -0.15368133783340454, -0.11499778926372528, -0.07631424069404602, -0.03763069212436676, 0.0010528564453125, 0.03973640501499176, 0.07841996848583221, 0.11710350215435028, 0.15578703582286835, 0.1944705992937088, 0.23315416276454926, 0.2718377113342285, 0.3105212450027466, 0.3492048382759094, 0.3878883719444275, 0.42657190561294556, 0.4652554392814636, 0.5039389729499817, 0.5426225662231445, 0.5813060998916626 ] } }, "transformer.layers.21.0.weight": { "min": -0.417529821395874, "max": 0.3719121813774109, "mean": 6.524643140437547e-06, "std": 0.021627992391586304, "abs_mean": 0.017142053693532944, "sparsity": 0.0, "shape": [ 1024, 2048 ], "norm": 31.3196964263916, "elements": 2097152, "histogram": { "counts": [ 1, 0, 2, 8, 12, 40, 68, 97, 154, 148, 133, 139, 86, 64, 23, 15, 4, 3, 1, 2 ], "bin_edges": [ -0.08539692312479019, -0.07676888257265091, -0.06814083456993103, -0.05951279401779175, -0.05088474974036217, -0.04225670546293259, -0.033628664910793304, -0.025000620633363724, -0.016372576355934143, -0.007744535803794861, 0.0008835121989250183, 0.0095115527510643, 0.018139593303203583, 0.026767641305923462, 0.035395681858062744, 0.04402372986078262, 0.052651770412921906, 0.06127981096506119, 0.06990785151720047, 0.07853590697050095, 0.08716394007205963 ] } }, "transformer.layers.21.1.g": { "min": 0.21460720896720886, "max": 0.7452309131622314, "mean": 0.6493626832962036, "std": 0.054172683507204056, "abs_mean": 0.6493626832962036, "sparsity": 0.0, "shape": [ 1024 ], "norm": 20.85171890258789, "elements": 1024, "histogram": { "counts": [ 1, 0, 2, 0, 0, 2, 4, 1, 4, 3, 6, 6, 20, 35, 57, 175, 270, 260, 126, 28 ], "bin_edges": [ 0.21460720896720886, 0.24113839864730835, 0.26766958832740784, 0.2942007780075073, 0.3207319378852844, 0.3472631275653839, 0.3737943172454834, 0.4003254771232605, 0.4268566966056824, 0.4533878564834595, 0.47991904616355896, 0.5064502358436584, 0.5329813957214355, 0.5595126152038574, 0.5860437750816345, 0.6125749349594116, 0.6391061544418335, 0.6656373739242554, 0.6921685338020325, 0.7186996936798096, 0.7452309131622314 ] } }, "transformer.layers.21.2.to_q.weight": { "min": -0.20914840698242188, "max": 0.19524669647216797, "mean": 4.0109844121616334e-05, "std": 0.03945964202284813, "abs_mean": 0.031188296154141426, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 40.406219482421875, "elements": 1048576, "histogram": { "counts": [ 2, 2, 14, 18, 29, 46, 68, 84, 95, 119, 139, 100, 97, 69, 53, 22, 20, 14, 7, 2 ], "bin_edges": [ -0.12001488357782364, -0.10817231237888336, -0.09632974117994308, -0.08448716998100281, -0.07264459878206253, -0.06080202758312225, -0.048959456384181976, -0.0371168851852417, -0.025274313986301422, -0.013431742787361145, -0.001589171588420868, 0.010253392159938812, 0.022095970809459686, 0.03393854945898056, 0.04578111320734024, 0.05762367695569992, 0.0694662556052208, 0.08130883425474167, 0.09315139800310135, 0.10499396175146103, 0.1168365478515625 ] } }, "transformer.layers.21.2.to_q.bias": { "min": -0.32907912135124207, "max": 0.25925326347351074, "mean": -0.003227418288588524, "std": 0.05623279884457588, "abs_mean": 0.039593495428562164, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.8015334606170654, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 1, 4, 10, 17, 29, 43, 111, 235, 293, 148, 56, 29, 14, 2, 5, 0, 2 ], "bin_edges": [ -0.32907912135124207, -0.2996625006198883, -0.27024587988853455, -0.2408292591571808, -0.21141263842582703, -0.18199601769447327, -0.1525793969631195, -0.12316277623176575, -0.09374615550041199, -0.06432953476905823, -0.03491291403770447, -0.005496293306350708, 0.023920327425003052, 0.05333694815635681, 0.08275356888771057, 0.11217018961906433, 0.1415868103504181, 0.17100343108177185, 0.2004200518131256, 0.22983667254447937, 0.25925326347351074 ] } }, "transformer.layers.21.2.to_k.weight": { "min": -0.20563212037086487, "max": 0.25434860587120056, "mean": 5.404070907388814e-05, "std": 0.038562316447496414, "abs_mean": 0.030502334237098694, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 39.487403869628906, "elements": 1048576, "histogram": { "counts": [ 4, 3, 21, 18, 41, 70, 91, 129, 118, 142, 115, 78, 58, 59, 24, 14, 10, 3, 1, 1 ], "bin_edges": [ -0.11593008786439896, -0.10307550430297852, -0.09022092819213867, -0.07736634463071823, -0.06451176106929779, -0.05165718495845795, -0.038802601397037506, -0.025948025286197662, -0.013093441724777222, -0.000238858163356781, 0.012615717947483063, 0.025470294058322906, 0.038324885070323944, 0.05117946118116379, 0.06403403729200363, 0.07688862830400467, 0.08974320441484451, 0.10259778052568436, 0.1154523715376854, 0.12830695509910583, 0.14116151630878448 ] } }, "transformer.layers.21.2.to_k.bias": { "min": -6.2339768409729, "max": 6.921723365783691, "mean": 0.04828859120607376, "std": 1.383695363998413, "abs_mean": 0.8800080418586731, "sparsity": 0.0, "shape": [ 1024 ], "norm": 44.28359603881836, "elements": 1024, "histogram": { "counts": [ 1, 3, 4, 8, 11, 11, 35, 68, 181, 348, 192, 63, 33, 14, 4, 6, 7, 4, 5, 2 ], "bin_edges": [ -6.2339768409729, -5.5761919021606445, -4.9184064865112305, -4.260621547698975, -3.6028366088867188, -2.945051670074463, -2.287266492843628, -1.629481315612793, -0.9716963768005371, -0.31391143798828125, 0.3438735008239746, 1.0016589164733887, 1.6594438552856445, 2.3172287940979004, 2.9750142097473145, 3.632798671722412, 4.290584087371826, 4.94836950302124, 5.606153964996338, 6.263939380645752, 6.921723365783691 ] } }, "transformer.layers.21.2.to_v.weight": { "min": -0.20957675576210022, "max": 0.23022468388080597, "mean": -4.7416378947673365e-06, "std": 0.04131784662604332, "abs_mean": 0.03262507542967796, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 42.30889129638672, "elements": 1048576, "histogram": { "counts": [ 6, 6, 17, 36, 57, 64, 116, 124, 140, 130, 100, 95, 49, 27, 14, 12, 4, 2, 0, 1 ], "bin_edges": [ -0.12215577065944672, -0.10773155093193054, -0.09330733865499496, -0.07888311892747879, -0.06445890665054321, -0.05003468692302704, -0.035610467195510864, -0.021186254918575287, -0.0067620351910591125, 0.007662177085876465, 0.02208639681339264, 0.036510616540908813, 0.05093483626842499, 0.06535905599594116, 0.07978326082229614, 0.09420748054981232, 0.10863170027732849, 0.12305592000484467, 0.13748012483119965, 0.15190435945987701, 0.1663285791873932 ] } }, "transformer.layers.21.2.to_v.bias": { "min": -0.043760623782873154, "max": 0.03593071922659874, "mean": -6.6086213337257504e-06, "std": 0.012794941663742065, "abs_mean": 0.01062579732388258, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.40923821926116943, "elements": 1024, "histogram": { "counts": [ 1, 1, 1, 6, 14, 28, 68, 87, 103, 90, 105, 103, 104, 91, 86, 54, 40, 12, 3, 3 ], "bin_edges": [ -0.043760623782873154, -0.03977605700492859, -0.035791490226984024, -0.03180692344903946, -0.027822354808449745, -0.02383778803050518, -0.019853219389915466, -0.0158686526119709, -0.011884085834026337, -0.007899519056081772, -0.003914952278137207, 6.961449980735779e-05, 0.004054185003042221, 0.008038751780986786, 0.01202331855893135, 0.016007885336875916, 0.01999245211482048, 0.023977022618055344, 0.02796158567070961, 0.03194615617394447, 0.03593071922659874 ] } }, "transformer.layers.21.2.to_out.0.weight": { "min": -0.3974460959434509, "max": 0.3449029326438904, "mean": -5.5259803048102185e-05, "std": 0.0423947237432003, "abs_mean": 0.03332170099020004, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 43.41182327270508, "elements": 1048576, "histogram": { "counts": [ 4, 10, 9, 26, 33, 67, 65, 109, 147, 115, 150, 104, 69, 49, 22, 8, 6, 3, 2, 2 ], "bin_edges": [ -0.130792036652565, -0.11647318303585052, -0.10215433686971664, -0.08783548325300217, -0.07351663708686829, -0.05919778347015381, -0.04487892985343933, -0.03056008368730545, -0.016241230070590973, -0.0019223839044570923, 0.012396469712257385, 0.026715323328971863, 0.04103417694568634, 0.05535303056240082, 0.0696718692779541, 0.08399072289466858, 0.09830957651138306, 0.11262843012809753, 0.12694726884365082, 0.1412661224603653, 0.15558499097824097 ] } }, "transformer.layers.21.2.to_out.0.bias": { "min": -0.055080167949199677, "max": 0.06271716207265854, "mean": 0.0003585012163966894, "std": 0.018664730712771416, "abs_mean": 0.01492932066321373, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.5970898866653442, "elements": 1024, "histogram": { "counts": [ 4, 3, 16, 28, 39, 53, 71, 106, 116, 132, 118, 107, 77, 57, 37, 19, 11, 4, 1, 1 ], "bin_edges": [ -0.055080167949199677, -0.049190301448106766, -0.043300434947013855, -0.037410568445920944, -0.03152070194482803, -0.025630835443735123, -0.019740968942642212, -0.013851102441549301, -0.00796123594045639, -0.0020713694393634796, 0.003818497061729431, 0.00970836728811264, 0.015598230063915253, 0.021488092839717865, 0.027377963066101074, 0.03326783329248428, 0.039157696068286896, 0.04504755884408951, 0.05093742907047272, 0.056827299296855927, 0.06271716207265854 ] } }, "transformer.layers.21.3.g": { "min": 0.3508152663707733, "max": 1.0430189371109009, "mean": 0.789574146270752, "std": 0.048565711826086044, "abs_mean": 0.789574146270752, "sparsity": 0.0, "shape": [ 1024 ], "norm": 25.314075469970703, "elements": 1024, "histogram": { "counts": [ 2, 1, 1, 2, 0, 1, 4, 3, 2, 8, 22, 141, 390, 360, 50, 6, 3, 1, 1, 2 ], "bin_edges": [ 0.3508152663707733, 0.3854254484176636, 0.42003563046455383, 0.4546458125114441, 0.48925599455833435, 0.5238661766052246, 0.5584763288497925, 0.5930865406990051, 0.6276967525482178, 0.6623069047927856, 0.6969170570373535, 0.7315272688865662, 0.7661374807357788, 0.8007476329803467, 0.8353577852249146, 0.869968056678772, 0.9045782089233398, 0.9391883611679077, 0.9737985134124756, 1.008408784866333, 1.0430189371109009 ] } }, "transformer.layers.21.4.ff.0.0.weight": { "min": -0.3336288034915924, "max": 0.38612979650497437, "mean": -0.00016904372023418546, "std": 0.041490498930215836, "abs_mean": 0.032748062163591385, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 84.96529388427734, "elements": 4194304, "histogram": { "counts": [ 6, 8, 14, 28, 56, 74, 111, 133, 137, 145, 99, 86, 49, 25, 13, 8, 2, 2, 1, 3 ], "bin_edges": [ -0.12654127180576324, -0.1117871031165123, -0.09703293442726135, -0.08227875828742981, -0.06752458959817886, -0.05277042090892792, -0.038016244769096375, -0.02326207607984543, -0.008507907390594482, 0.0062462687492370605, 0.02100042998790741, 0.03575460612773895, 0.050508782267570496, 0.06526294350624084, 0.08001711964607239, 0.09477128088474274, 0.10952545702457428, 0.12427963316440582, 0.13903380930423737, 0.15378795564174652, 0.16854214668273926 ] } }, "transformer.layers.21.4.ff.0.0.bias": { "min": -0.15726615488529205, "max": 0.05897233635187149, "mean": -0.031808022409677505, "std": 0.02507229521870613, "abs_mean": 0.03385263308882713, "sparsity": 0.0, "shape": [ 4096 ], "norm": 2.5919737815856934, "elements": 4096, "histogram": { "counts": [ 2, 2, 4, 8, 10, 13, 19, 65, 70, 132, 125, 138, 133, 98, 79, 53, 28, 14, 5, 2 ], "bin_edges": [ -0.12798060476779938, -0.11949160695075989, -0.1110026091337204, -0.10251360386610031, -0.09402460604906082, -0.08553560823202133, -0.07704660296440125, -0.06855760514736176, -0.060068607330322266, -0.051579609513282776, -0.043090611696243286, -0.0346016064286232, -0.02611260861158371, -0.01762361079454422, -0.009134605526924133, -0.0006456077098846436, 0.007843390107154846, 0.016332387924194336, 0.024821385741233826, 0.033310383558273315, 0.041799396276474 ] } }, "transformer.layers.21.4.ff.2.weight": { "min": -0.6961155533790588, "max": 0.4685930609703064, "mean": -8.521115523763001e-05, "std": 0.05180642008781433, "abs_mean": 0.0401139073073864, "sparsity": 0.0, "shape": [ 1024, 4096 ], "norm": 106.08828735351562, "elements": 4194304, "histogram": { "counts": [ 1, 3, 4, 9, 27, 46, 87, 141, 190, 184, 148, 84, 43, 20, 8, 2, 0, 1, 0, 2 ], "bin_edges": [ -0.21053080260753632, -0.18716660141944885, -0.16380241513252258, -0.14043821394443512, -0.11707401275634766, -0.09370981156826019, -0.07034562528133392, -0.04698142409324646, -0.023617222905158997, -0.0002530217170715332, 0.02311117947101593, 0.0464753657579422, 0.06983955204486847, 0.09320376813411713, 0.1165679544210434, 0.13993217051029205, 0.16329635679721832, 0.1866605430841446, 0.21002475917339325, 0.23338894546031952, 0.25675317645072937 ] } }, "transformer.layers.21.4.ff.2.bias": { "min": -0.24746476113796234, "max": 0.32834842801094055, "mean": -0.00026278701261617243, "std": 0.041423212736845016, "abs_mean": 0.030439719557762146, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.3249220848083496, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 2, 3, 21, 79, 228, 305, 227, 103, 20, 6, 0, 2, 1, 0, 0, 0, 2 ], "bin_edges": [ -0.24746476113796234, -0.21867410838603973, -0.18988344073295593, -0.16109278798103333, -0.13230213522911072, -0.10351146757602692, -0.07472081482410431, -0.04593014717102051, -0.0171394944190979, 0.011651173233985901, 0.04044182598590851, 0.06923247873783112, 0.09802313148975372, 0.12681378424167633, 0.15560446679592133, 0.18439511954784393, 0.21318577229976654, 0.24197642505168915, 0.27076709270477295, 0.29955774545669556, 0.32834842801094055 ] } }, "transformer.layers.22.0.weight": { "min": -0.2869253158569336, "max": 0.35028234124183655, "mean": -2.780619524855865e-06, "std": 0.02424117736518383, "abs_mean": 0.019041862338781357, "sparsity": 0.0, "shape": [ 1024, 2048 ], "norm": 35.103759765625, "elements": 2097152, "histogram": { "counts": [ 1, 0, 0, 0, 1, 1, 7, 13, 24, 73, 106, 207, 207, 171, 95, 56, 30, 4, 3, 1 ], "bin_edges": [ -0.14472992718219757, -0.13296078145503998, -0.12119164317846298, -0.10942250490188599, -0.0976533591747284, -0.0858842208981514, -0.0741150826215744, -0.06234593689441681, -0.05057679861783981, -0.03880766034126282, -0.027038514614105225, -0.015269368886947632, -0.003500238060951233, 0.00826890766620636, 0.020038053393363953, 0.03180718421936035, 0.043576329946517944, 0.05534547567367554, 0.06711460649967194, 0.07888375222682953, 0.09065289795398712 ] } }, "transformer.layers.22.1.g": { "min": 0.1968069076538086, "max": 0.7775169014930725, "mean": 0.6701230406761169, "std": 0.058515764772892, "abs_mean": 0.6701230406761169, "sparsity": 0.0, "shape": [ 1024 ], "norm": 21.525455474853516, "elements": 1024, "histogram": { "counts": [ 1, 2, 0, 0, 1, 1, 1, 0, 3, 9, 6, 5, 21, 35, 84, 153, 283, 270, 111, 14 ], "bin_edges": [ 0.1968069076538086, 0.2258424013853073, 0.25487789511680603, 0.28391340374946594, 0.31294891238212585, 0.3419843912124634, 0.3710198998451233, 0.4000554084777832, 0.4290909171104431, 0.45812639594078064, 0.48716190457344055, 0.5161974430084229, 0.545232892036438, 0.5742684006690979, 0.6033039093017578, 0.632339358329773, 0.6613749265670776, 0.6904103755950928, 0.7194458842277527, 0.7484813928604126, 0.7775169014930725 ] } }, "transformer.layers.22.2.to_q.weight": { "min": -0.2286878526210785, "max": 0.23117558658123016, "mean": -2.085552659991663e-05, "std": 0.04044000059366226, "abs_mean": 0.03183070570230484, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 41.409976959228516, "elements": 1048576, "histogram": { "counts": [ 1, 1, 6, 7, 13, 34, 52, 64, 94, 125, 135, 131, 117, 78, 60, 36, 26, 10, 4, 6 ], "bin_edges": [ -0.145659938454628, -0.1320401132106781, -0.1184203028678894, -0.10480047762393951, -0.09118065983057022, -0.07756084203720093, -0.06394101679325104, -0.050321198999881744, -0.03670138120651245, -0.023081563413143158, -0.009461745619773865, 0.004158079624176025, 0.017777904868125916, 0.03139771521091461, 0.0450175404548645, 0.0586373507976532, 0.07225717604160309, 0.08587700128555298, 0.09949681162834167, 0.11311663687229156, 0.12673646211624146 ] } }, "transformer.layers.22.2.to_q.bias": { "min": -0.2196890264749527, "max": 0.24058501422405243, "mean": 0.0007775035337544978, "std": 0.05580567941069603, "abs_mean": 0.04114898666739464, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.7850829362869263, "elements": 1024, "histogram": { "counts": [ 2, 2, 3, 10, 13, 18, 71, 103, 161, 207, 180, 105, 50, 39, 12, 9, 8, 2, 2, 3 ], "bin_edges": [ -0.2196890264749527, -0.19667533040046692, -0.17366161942481995, -0.15064792335033417, -0.1276342272758484, -0.10462051630020142, -0.08160682022571564, -0.058593109250068665, -0.035579413175582886, -0.012565717101097107, 0.010447993874549866, 0.03346170485019684, 0.05647538602352142, 0.0794890969991684, 0.10250280797481537, 0.12551648914813995, 0.14853020012378693, 0.1715439110994339, 0.19455759227275848, 0.21757130324840546, 0.24058501422405243 ] } }, "transformer.layers.22.2.to_k.weight": { "min": -0.21652470529079437, "max": 0.2261732518672943, "mean": -7.23175035091117e-05, "std": 0.03937419131398201, "abs_mean": 0.03104310855269432, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 40.318748474121094, "elements": 1048576, "histogram": { "counts": [ 3, 10, 18, 24, 40, 56, 86, 104, 108, 130, 96, 102, 97, 54, 37, 22, 7, 2, 2, 2 ], "bin_edges": [ -0.11781381815671921, -0.1056215912103653, -0.09342936426401138, -0.08123713731765747, -0.06904491037130356, -0.056852683424949646, -0.044660456478595734, -0.03246822953224182, -0.02027600258588791, -0.008083775639533997, 0.004108451306819916, 0.016300685703754425, 0.02849290519952774, 0.040685124695301056, 0.052877359092235565, 0.06506959348917007, 0.07726181298494339, 0.0894540324807167, 0.10164626687765121, 0.11383850127458572, 0.12603072822093964 ] } }, "transformer.layers.22.2.to_k.bias": { "min": -8.891955375671387, "max": 9.054566383361816, "mean": -0.0012135691940784454, "std": 1.846129059791565, "abs_mean": 1.065543293952942, "sparsity": 0.0, "shape": [ 1024 ], "norm": 59.04728698730469, "elements": 1024, "histogram": { "counts": [ 4, 3, 5, 10, 7, 16, 16, 42, 113, 464, 184, 56, 34, 10, 15, 6, 4, 7, 3, 1 ], "bin_edges": [ -8.464720726013184, -7.588756561279297, -6.712791919708252, -5.836827754974365, -4.96086311340332, -4.084898948669434, -3.208934783935547, -2.332970142364502, -1.4570059776306152, -0.5810418128967285, 0.2949228286743164, 1.1708869934082031, 2.04685115814209, 2.9228153228759766, 3.7987804412841797, 4.674744606018066, 5.550708770751953, 6.42667293548584, 7.302637100219727, 8.178601264953613, 9.054566383361816 ] } }, "transformer.layers.22.2.to_v.weight": { "min": -0.2690034806728363, "max": 0.25858405232429504, "mean": 4.355451528681442e-05, "std": 0.03841076418757439, "abs_mean": 0.030269136652350426, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 39.33214569091797, "elements": 1048576, "histogram": { "counts": [ 1, 2, 7, 7, 14, 24, 54, 84, 113, 124, 138, 148, 99, 74, 46, 35, 20, 8, 1, 1 ], "bin_edges": [ -0.133734330534935, -0.12091077864170074, -0.1080872192978859, -0.09526366740465164, -0.08244010806083679, -0.06961655616760254, -0.056793004274368286, -0.043969444930553436, -0.031145893037319183, -0.01832234114408493, -0.005498781800270081, 0.007324770092964172, 0.020148321986198425, 0.03297187387943268, 0.045795440673828125, 0.05861899256706238, 0.07144254446029663, 0.08426609635353088, 0.09708964824676514, 0.10991321504116058, 0.12273675948381424 ] } }, "transformer.layers.22.2.to_v.bias": { "min": -0.057884324342012405, "max": 0.05789237469434738, "mean": 0.0003543176280800253, "std": 0.014708762988448143, "abs_mean": 0.011816874146461487, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.4705871641635895, "elements": 1024, "histogram": { "counts": [ 2, 1, 0, 6, 12, 23, 58, 108, 148, 138, 140, 128, 115, 80, 18, 11, 5, 4, 2, 1 ], "bin_edges": [ -0.057884324342012405, -0.05209548771381378, -0.04630665481090546, -0.04051781818270683, -0.03472898155450821, -0.028940148651599884, -0.02315131202340126, -0.017362479120492935, -0.011573642492294312, -0.005784805864095688, 4.027038812637329e-06, 0.0057928599417209625, 0.011581700295209885, 0.01737053319811821, 0.023159366101026535, 0.028948206454515457, 0.03473703935742378, 0.04052587226033211, 0.04631471261382103, 0.052103545516729355, 0.05789237469434738 ] } }, "transformer.layers.22.2.to_out.0.weight": { "min": -0.263511061668396, "max": 0.288027822971344, "mean": -6.177674367791042e-05, "std": 0.03907754644751549, "abs_mean": 0.030490010976791382, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 40.01486587524414, "elements": 1048576, "histogram": { "counts": [ 2, 5, 8, 12, 10, 35, 65, 103, 136, 154, 157, 130, 73, 45, 39, 8, 11, 3, 2, 2 ], "bin_edges": [ -0.14389725029468536, -0.1291889250278473, -0.11448058485984802, -0.09977225214242935, -0.08506391942501068, -0.07035558670759201, -0.05564725399017334, -0.04093892127275467, -0.026230588555336, -0.011522248387336731, 0.0031860768795013428, 0.017894402146339417, 0.032602742314338684, 0.04731108248233795, 0.062019407749176025, 0.0767277330160141, 0.09143607318401337, 0.10614441335201263, 0.1208527535200119, 0.13556106388568878, 0.15026940405368805 ] } }, "transformer.layers.22.2.to_out.0.bias": { "min": -0.044037725776433945, "max": 0.037295691668987274, "mean": -9.799870167626068e-05, "std": 0.013339235447347164, "abs_mean": 0.010626820847392082, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.426658570766449, "elements": 1024, "histogram": { "counts": [ 2, 2, 7, 8, 19, 33, 46, 71, 96, 128, 117, 111, 111, 83, 61, 50, 26, 12, 12, 5 ], "bin_edges": [ -0.044037725776433945, -0.03997105360031128, -0.03590438514947891, -0.03183771297335625, -0.02777104265987873, -0.023704372346401215, -0.01963770017027855, -0.015571029856801033, -0.011504359543323517, -0.0074376873672008514, -0.0033710189163684845, 0.0006956532597541809, 0.004762325435876846, 0.008828993886709213, 0.012895666062831879, 0.016962334513664246, 0.02102900668978691, 0.025095675140619278, 0.029162351042032242, 0.03322901949286461, 0.037295691668987274 ] } }, "transformer.layers.22.3.g": { "min": 0.339274525642395, "max": 1.0903433561325073, "mean": 0.8638954162597656, "std": 0.06374805420637131, "abs_mean": 0.8638954162597656, "sparsity": 0.0, "shape": [ 1024 ], "norm": 27.719741821289062, "elements": 1024, "histogram": { "counts": [ 1, 1, 0, 4, 5, 3, 1, 3, 4, 1, 10, 22, 70, 257, 471, 124, 12, 7, 2, 2 ], "bin_edges": [ 0.339274525642395, 0.3768279552459717, 0.4143814146518707, 0.4519348442554474, 0.48948830366134644, 0.5270417332649231, 0.5645951628684998, 0.6021486520767212, 0.6397020816802979, 0.6772555112838745, 0.7148089408874512, 0.7523623704910278, 0.7899158000946045, 0.8274692296981812, 0.8650227189064026, 0.9025761485099792, 0.9401295781135559, 0.9776830077171326, 1.0152363777160645, 1.0527899265289307, 1.0903433561325073 ] } }, "transformer.layers.22.4.ff.0.0.weight": { "min": -0.4230613112449646, "max": 0.41900894045829773, "mean": 0.00031366912298835814, "std": 0.043512988835573196, "abs_mean": 0.03395019471645355, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 89.10926055908203, "elements": 4194304, "histogram": { "counts": [ 1, 0, 3, 1, 9, 20, 43, 80, 112, 176, 161, 157, 96, 62, 44, 23, 8, 0, 3, 1 ], "bin_edges": [ -0.17678457498550415, -0.15967991948127747, -0.14257526397705078, -0.1254706084728241, -0.10836594551801682, -0.09126129001379013, -0.07415662705898285, -0.057051971554756165, -0.03994731605052948, -0.022842660546302795, -0.005738005042076111, 0.011366650462150574, 0.028471320867538452, 0.04557597637176514, 0.06268063187599182, 0.0797852873802185, 0.09688994288444519, 0.11399459838867188, 0.13109925389289856, 0.14820390939712524, 0.16530856490135193 ] } }, "transformer.layers.22.4.ff.0.0.bias": { "min": -0.21445079147815704, "max": 0.17045123875141144, "mean": -0.029427748173475266, "std": 0.03184095025062561, "abs_mean": 0.03383331745862961, "sparsity": 0.0, "shape": [ 4096 ], "norm": 2.774671792984009, "elements": 4096, "histogram": { "counts": [ 2, 0, 3, 3, 4, 7, 46, 96, 159, 227, 257, 147, 46, 1, 0, 1, 0, 0, 0, 1 ], "bin_edges": [ -0.21445079147815704, -0.1952056884765625, -0.17596058547496796, -0.1567154824733734, -0.13747039437294006, -0.11822528392076492, -0.09898018836975098, -0.07973508536815643, -0.06048998236656189, -0.041244879364967346, -0.021999776363372803, -0.0027546733617782593, 0.01649041473865509, 0.03573553264141083, 0.05498062074184418, 0.07422573864459991, 0.09347082674503326, 0.11271591484546661, 0.13196103274822235, 0.1512061208486557, 0.17045123875141144 ] } }, "transformer.layers.22.4.ff.2.weight": { "min": -0.5979012846946716, "max": 0.559224545955658, "mean": -0.00014804149395786226, "std": 0.053461432456970215, "abs_mean": 0.04055660963058472, "sparsity": 0.0, "shape": [ 1024, 4096 ], "norm": 109.4767837524414, "elements": 4194304, "histogram": { "counts": [ 2, 0, 0, 4, 3, 20, 39, 65, 102, 175, 205, 155, 102, 63, 35, 14, 11, 2, 0, 3 ], "bin_edges": [ -0.2224254310131073, -0.20098626613616943, -0.17954708635807037, -0.1581079214811325, -0.13666874170303345, -0.11522957682609558, -0.09379041194915771, -0.07235123217105865, -0.05091206729412079, -0.029472902417182922, -0.008033722639083862, 0.013405442237854004, 0.03484460711479187, 0.056283771991729736, 0.07772296667098999, 0.09916213154792786, 0.12060129642486572, 0.1420404613018036, 0.16347962617874146, 0.1849188208580017, 0.20635798573493958 ] } }, "transformer.layers.22.4.ff.2.bias": { "min": -0.17863567173480988, "max": 0.3767751455307007, "mean": 0.0013495876919478178, "std": 0.037288032472133636, "abs_mean": 0.027615398168563843, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.1934159994125366, "elements": 1024, "histogram": { "counts": [ 1, 1, 1, 20, 82, 253, 318, 221, 79, 16, 4, 1, 0, 1, 1, 0, 0, 0, 0, 1 ], "bin_edges": [ -0.17863567173480988, -0.15086513757705688, -0.1230945885181427, -0.09532405436038971, -0.06755351275205612, -0.039782971143722534, -0.012012436985969543, 0.01575811207294464, 0.04352864623069763, 0.07129918038845062, 0.09906972944736481, 0.126840278506279, 0.1546107977628708, 0.18238134682178497, 0.21015189588069916, 0.23792241513729095, 0.26569294929504395, 0.2934635281562805, 0.3212340474128723, 0.3490046262741089, 0.3767751455307007 ] } }, "transformer.layers.23.0.weight": { "min": -0.3942491412162781, "max": 0.3687455952167511, "mean": 3.7661615351680666e-05, "std": 0.028617454692721367, "abs_mean": 0.021983902901411057, "sparsity": 0.0, "shape": [ 1024, 2048 ], "norm": 41.44097900390625, "elements": 2097152, "histogram": { "counts": [ 2, 0, 3, 6, 8, 23, 49, 78, 127, 146, 161, 161, 103, 55, 37, 24, 8, 4, 1, 4 ], "bin_edges": [ -0.10764998942613602, -0.09715531021356583, -0.08666063845157623, -0.07616595923900604, -0.06567128002643585, -0.05517660826444626, -0.04468192905187607, -0.034187257289886475, -0.023692578077316284, -0.013197898864746094, -0.0027032271027565002, 0.00779145210981369, 0.01828613132238388, 0.02878081053495407, 0.03927547484636307, 0.04977015405893326, 0.06026483327150345, 0.07075951248407364, 0.08125419169664383, 0.09174885600805283, 0.10224352777004242 ] } }, "transformer.layers.23.1.g": { "min": 0.2907008230686188, "max": 0.8258129358291626, "mean": 0.7054593563079834, "std": 0.06773429363965988, "abs_mean": 0.7054593563079834, "sparsity": 0.0, "shape": [ 1024 ], "norm": 22.678415298461914, "elements": 1024, "histogram": { "counts": [ 1, 0, 2, 2, 4, 7, 1, 9, 8, 16, 17, 22, 27, 52, 110, 185, 261, 219, 49, 8 ], "bin_edges": [ 0.2907008230686188, 0.3174564242362976, 0.34421202540397644, 0.3709676265716553, 0.3977232575416565, 0.4244788587093353, 0.45123445987701416, 0.4779900908470154, 0.5047456622123718, 0.531501293182373, 0.5582568645477295, 0.5850124955177307, 0.6117681264877319, 0.6385236978530884, 0.6652793288230896, 0.6920349597930908, 0.7187905311584473, 0.7455461025238037, 0.7723017334938049, 0.7990573644638062, 0.8258129358291626 ] } }, "transformer.layers.23.2.to_q.weight": { "min": -0.9265665411949158, "max": 1.0269814729690552, "mean": -2.791242877719924e-05, "std": 0.04764382541179657, "abs_mean": 0.03451031446456909, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 48.78652572631836, "elements": 1048576, "histogram": { "counts": [ 2, 5, 1, 2, 6, 9, 23, 52, 71, 111, 195, 192, 150, 95, 45, 19, 12, 5, 3, 2 ], "bin_edges": [ -0.21141943335533142, -0.19226078689098358, -0.17310214042663574, -0.1539434790611267, -0.13478483259677887, -0.11562618613243103, -0.0964675322175026, -0.07730887830257416, -0.05815023183822632, -0.03899158537387848, -0.01983293890953064, -0.0006742775440216064, 0.018484368920326233, 0.03764301538467407, 0.056801676750183105, 0.07596030831336975, 0.09511896967887878, 0.11427763104438782, 0.13343626260757446, 0.1525949239730835, 0.17175358533859253 ] } }, "transformer.layers.23.2.to_q.bias": { "min": -0.8779393434524536, "max": 0.8145599365234375, "mean": -0.0002924790605902672, "std": 0.09544122219085693, "abs_mean": 0.05420161783695221, "sparsity": 0.0, "shape": [ 1024 ], "norm": 3.0526418685913086, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 0, 1, 4, 7, 7, 31, 196, 594, 134, 12, 3, 5, 1, 3, 0, 0, 1 ], "bin_edges": [ -0.8779393434524536, -0.7933143973350525, -0.7086894512176514, -0.6240644454956055, -0.5394394993782043, -0.4548145532608032, -0.3701895475387573, -0.2855646014213562, -0.20093965530395508, -0.11631470918655396, -0.03168976306915283, 0.052935242652893066, 0.13756024837493896, 0.2221851348876953, 0.3068101406097412, 0.39143502712249756, 0.47606003284454346, 0.5606850385665894, 0.6453099250793457, 0.7299349308013916, 0.8145599365234375 ] } }, "transformer.layers.23.2.to_k.weight": { "min": -0.27007606625556946, "max": 0.24068056046962738, "mean": -2.2448431991506368e-05, "std": 0.038949914276599884, "abs_mean": 0.030576692894101143, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 39.88426208496094, "elements": 1048576, "histogram": { "counts": [ 7, 16, 12, 45, 55, 104, 128, 135, 131, 129, 76, 62, 52, 18, 16, 10, 2, 1, 0, 1 ], "bin_edges": [ -0.10851769894361496, -0.09493239969015121, -0.08134710043668747, -0.06776180118322372, -0.05417650192975998, -0.040591202676296234, -0.02700590342283249, -0.013420604169368744, 0.00016469508409500122, 0.013749994337558746, 0.02733529359102249, 0.04092059284448624, 0.05450589209794998, 0.06809119135141373, 0.08167649060487747, 0.09526178985834122, 0.10884708911180496, 0.12243238836526871, 0.13601768016815186, 0.1496029794216156, 0.16318829357624054 ] } }, "transformer.layers.23.2.to_k.bias": { "min": -23.705463409423828, "max": 22.81535530090332, "mean": -0.09178592264652252, "std": 4.064526081085205, "abs_mean": 1.7296252250671387, "sparsity": 0.0, "shape": [ 1024 ], "norm": 130.03448486328125, "elements": 1024, "histogram": { "counts": [ 4, 8, 5, 6, 5, 13, 10, 22, 235, 610, 32, 9, 5, 11, 7, 4, 4, 4, 3, 3 ], "bin_edges": [ -19.21725845336914, -17.11562728881836, -15.013997077941895, -12.91236686706543, -10.810735702514648, -8.709104537963867, -6.607474327087402, -4.5058441162109375, -2.4042129516601562, -0.302581787109375, 1.7990493774414062, 3.9006786346435547, 6.002309799194336, 8.103940963745117, 10.205570220947266, 12.307201385498047, 14.408832550048828, 16.51046371459961, 18.61209487915039, 20.713726043701172, 22.81535530090332 ] } }, "transformer.layers.23.2.to_v.weight": { "min": -0.2275296449661255, "max": 0.2455320507287979, "mean": -2.5536401153658517e-05, "std": 0.03864150494337082, "abs_mean": 0.030268080532550812, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 39.56838607788086, "elements": 1048576, "histogram": { "counts": [ 3, 2, 6, 13, 27, 41, 67, 108, 126, 126, 141, 112, 88, 59, 38, 18, 9, 7, 6, 3 ], "bin_edges": [ -0.1285347193479538, -0.11553225666284561, -0.10252979397773743, -0.08952733129262924, -0.07652486860752106, -0.06352240592241287, -0.05051994323730469, -0.0375174805521965, -0.024515017867088318, -0.011512555181980133, 0.0014899075031280518, 0.014492377638816833, 0.02749483287334442, 0.04049728810787201, 0.05349975824356079, 0.06650222837924957, 0.07950468361377716, 0.09250713884830475, 0.10550960898399353, 0.11851207911968231, 0.1315145343542099 ] } }, "transformer.layers.23.2.to_v.bias": { "min": -0.06007588282227516, "max": 0.045354753732681274, "mean": -0.00013596308417618275, "std": 0.014683394692838192, "abs_mean": 0.012035916559398174, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.4696592688560486, "elements": 1024, "histogram": { "counts": [ 1, 0, 0, 1, 7, 15, 29, 66, 89, 133, 106, 132, 117, 118, 94, 57, 22, 7, 3, 3 ], "bin_edges": [ -0.06007588282227516, -0.054804351180791855, -0.04953281953930855, -0.04426128789782524, -0.038989756256341934, -0.03371822461485863, -0.02844669297337532, -0.023175161331892014, -0.017903629690408707, -0.0126320980489254, -0.007360566407442093, -0.002089034765958786, 0.003182496875524521, 0.008454028517007828, 0.013725560158491135, 0.01899709179997444, 0.02426862344145775, 0.029540155082941055, 0.03481168672442436, 0.04008321836590767, 0.045354753732681274 ] } }, "transformer.layers.23.2.to_out.0.weight": { "min": -0.33782336115837097, "max": 0.3746013939380646, "mean": 7.420163456117734e-06, "std": 0.04082043468952179, "abs_mean": 0.03092447854578495, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 41.79957580566406, "elements": 1048576, "histogram": { "counts": [ 2, 3, 3, 5, 19, 26, 57, 100, 158, 164, 198, 138, 56, 43, 15, 6, 1, 3, 2, 1 ], "bin_edges": [ -0.1686936765909195, -0.1513969749212265, -0.13410025835037231, -0.11680355668067932, -0.09950684756040573, -0.08221013844013214, -0.06491343677043915, -0.04761672765016556, -0.030320018529891968, -0.013023316860198975, 0.004273399710655212, 0.021570101380348206, 0.0388668030500412, 0.056163519620895386, 0.07346022129058838, 0.09075693786144257, 0.10805363953113556, 0.12535034120082855, 0.14264704287052155, 0.15994377434253693, 0.1772404909133911 ] } }, "transformer.layers.23.2.to_out.0.bias": { "min": -0.046125710010528564, "max": 0.19506430625915527, "mean": 0.0002738517359830439, "std": 0.013541821390390396, "abs_mean": 0.009806378744542599, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.43321529030799866, "elements": 1024, "histogram": { "counts": [ 5, 34, 168, 349, 326, 103, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 ], "bin_edges": [ -0.046125710010528564, -0.03406620770692825, -0.02200670912861824, -0.009947210550308228, 0.0021122917532920837, 0.014171794056892395, 0.02623128890991211, 0.03829079121351242, 0.05035029351711273, 0.06240979582071304, 0.07446929812431335, 0.08652879297733307, 0.09858828783035278, 0.11064779758453369, 0.1227072924375534, 0.13476680219173431, 0.14682629704475403, 0.15888579189777374, 0.17094530165195465, 0.18300479650497437, 0.19506430625915527 ] } }, "transformer.layers.23.3.g": { "min": 0.373764306306839, "max": 1.1280238628387451, "mean": 0.8901123404502869, "std": 0.06384868174791336, "abs_mean": 0.8901123404502869, "sparsity": 0.0, "shape": [ 1024 ], "norm": 28.556705474853516, "elements": 1024, "histogram": { "counts": [ 2, 0, 0, 0, 3, 4, 3, 6, 9, 14, 15, 26, 96, 327, 377, 101, 12, 1, 3, 1 ], "bin_edges": [ 0.373764306306839, 0.41147729754447937, 0.44919025897979736, 0.48690325021743774, 0.5246162414550781, 0.5623291730880737, 0.6000422239303589, 0.6377551555633545, 0.6754681468009949, 0.7131811380386353, 0.7508940696716309, 0.788607120513916, 0.8263200521469116, 0.864033043384552, 0.9017460346221924, 0.939458966255188, 0.9771720170974731, 1.0148849487304688, 1.052597999572754, 1.0903109312057495, 1.1280238628387451 ] } }, "transformer.layers.23.4.ff.0.0.weight": { "min": -0.44741326570510864, "max": 0.5422499775886536, "mean": 2.5218110749847256e-05, "std": 0.045580700039863586, "abs_mean": 0.03508353605866432, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 93.34017944335938, "elements": 4194304, "histogram": { "counts": [ 1, 2, 0, 2, 17, 56, 146, 252, 261, 164, 73, 20, 5, 0, 0, 0, 0, 0, 0, 1 ], "bin_edges": [ -0.23515397310256958, -0.20598602294921875, -0.17681807279586792, -0.1476501226425171, -0.11848217248916626, -0.08931422233581543, -0.0601462721824646, -0.03097832202911377, -0.0018103718757629395, 0.02735757827758789, 0.05652552843093872, 0.08569347858428955, 0.11486142873764038, 0.1440293788909912, 0.17319732904434204, 0.20236527919769287, 0.2315332293510437, 0.26070117950439453, 0.28986912965774536, 0.3190370798110962, 0.34820500016212463 ] } }, "transformer.layers.23.4.ff.0.0.bias": { "min": -0.22342835366725922, "max": 0.08723597973585129, "mean": -0.03199537843465805, "std": 0.03770318627357483, "abs_mean": 0.03751157969236374, "sparsity": 0.0, "shape": [ 4096 ], "norm": 3.1645314693450928, "elements": 4096, "histogram": { "counts": [ 1, 2, 4, 2, 5, 5, 8, 9, 14, 30, 64, 111, 129, 143, 153, 162, 89, 53, 14, 2 ], "bin_edges": [ -0.21711575984954834, -0.20336702466011047, -0.1896182745695114, -0.17586952447891235, -0.1621207892894745, -0.14837205410003662, -0.13462330400943756, -0.1208745613694191, -0.10712581872940063, -0.09337707608938217, -0.07962833344936371, -0.06587958335876465, -0.05213084816932678, -0.038382112979888916, -0.024633362889289856, -0.010884612798690796, 0.0028641223907470703, 0.016612857580184937, 0.030361607670783997, 0.04411035776138306, 0.057859089225530624 ] } }, "transformer.layers.23.4.ff.2.weight": { "min": -0.7254156470298767, "max": 0.6879446506500244, "mean": 3.628328340710141e-05, "std": 0.05179440602660179, "abs_mean": 0.03891872242093086, "sparsity": 0.0, "shape": [ 1024, 4096 ], "norm": 106.06195068359375, "elements": 4194304, "histogram": { "counts": [ 4, 3, 10, 28, 65, 177, 270, 222, 131, 50, 24, 8, 4, 3, 0, 0, 0, 0, 0, 1 ], "bin_edges": [ -0.20179718732833862, -0.17253395915031433, -0.14327071607112885, -0.11400748789310455, -0.08474425226449966, -0.055481016635894775, -0.026217788457870483, 0.0030454546213150024, 0.032308682799339294, 0.06157192587852478, 0.09083515405654907, 0.12009838223457336, 0.14936161041259766, 0.17862483859062195, 0.20788809657096863, 0.23715132474899292, 0.2664145529270172, 0.2956777811050415, 0.3249410390853882, 0.3542042374610901, 0.3834674656391144 ] } }, "transformer.layers.23.4.ff.2.bias": { "min": -0.174102783203125, "max": 0.2178839147090912, "mean": 3.535003634169698e-05, "std": 0.03175075352191925, "abs_mean": 0.023612717166543007, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.0155285596847534, "elements": 1024, "histogram": { "counts": [ 1, 1, 2, 3, 2, 20, 64, 183, 253, 268, 131, 48, 16, 4, 0, 2, 0, 1, 0, 1 ], "bin_edges": [ -0.174102783203125, -0.1545034497976303, -0.13490411639213562, -0.11530477553606033, -0.09570544213056564, -0.07610610872507095, -0.056506767868995667, -0.03690743446350098, -0.017308101058006287, 0.0022912323474884033, 0.021890565752983093, 0.04148989915847778, 0.06108924746513367, 0.08068856596946716, 0.10028791427612305, 0.11988723278045654, 0.13948658108711243, 0.1590859293937683, 0.1786852478981018, 0.1982845962047577, 0.2178839147090912 ] } }, "transformer.layers.24.0.weight": { "min": -0.33916032314300537, "max": 0.37271323800086975, "mean": 4.308380448492244e-05, "std": 0.034135378897190094, "abs_mean": 0.025202713906764984, "sparsity": 0.0, "shape": [ 1024, 2048 ], "norm": 49.43122863769531, "elements": 2097152, "histogram": { "counts": [ 3, 3, 3, 11, 20, 36, 73, 135, 221, 198, 138, 82, 35, 20, 10, 5, 1, 2, 1, 3 ], "bin_edges": [ -0.13991793990135193, -0.12436307221651077, -0.10880820453166962, -0.09325334429740906, -0.0776984766125679, -0.062143608927726746, -0.046588748693466187, -0.03103388100862503, -0.015479013323783875, 7.584691047668457e-05, 0.015630722045898438, 0.031185582280158997, 0.046740442514419556, 0.06229531764984131, 0.07785017788410187, 0.09340505301952362, 0.10895991325378418, 0.12451478838920593, 0.1400696337223053, 0.15562450885772705, 0.1711793839931488 ] } }, "transformer.layers.24.1.g": { "min": 0.3176645338535309, "max": 1.2846463918685913, "mean": 0.6014195084571838, "std": 0.08323279023170471, "abs_mean": 0.6014195084571838, "sparsity": 0.0, "shape": [ 1024 ], "norm": 19.42867660522461, "elements": 1024, "histogram": { "counts": [ 5, 19, 36, 45, 96, 322, 316, 112, 29, 5, 4, 2, 2, 2, 1, 1, 1, 1, 0, 1 ], "bin_edges": [ 0.3176645338535309, 0.36601361632347107, 0.41436272859573364, 0.46271181106567383, 0.511060893535614, 0.5594099760055542, 0.6077591180801392, 0.6561082005500793, 0.7044572830200195, 0.7528063654899597, 0.8011554479598999, 0.8495045900344849, 0.8978536128997803, 0.9462027549743652, 0.9945518970489502, 1.0429009199142456, 1.0912500619888306, 1.139599084854126, 1.187948226928711, 1.236297369003296, 1.2846463918685913 ] } }, "transformer.layers.24.2.to_q.weight": { "min": -0.2829808294773102, "max": 0.26017650961875916, "mean": -3.0644375783595024e-06, "std": 0.035980723798274994, "abs_mean": 0.027858424931764603, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 36.84376525878906, "elements": 1048576, "histogram": { "counts": [ 2, 1, 3, 9, 14, 32, 59, 84, 121, 163, 175, 128, 100, 48, 34, 14, 5, 3, 2, 3 ], "bin_edges": [ -0.135683074593544, -0.12228070199489594, -0.10887832939624786, -0.09547595679759979, -0.08207358419895172, -0.06867121160030365, -0.05526883900165558, -0.04186646640300751, -0.028464093804359436, -0.015061721205711365, -0.0016593486070632935, 0.011743023991584778, 0.02514539659023285, 0.03854776918888092, 0.05195014178752899, 0.06535251438617706, 0.07875488698482513, 0.0921572595834732, 0.10555963218212128, 0.11896200478076935, 0.13236436247825623 ] } }, "transformer.layers.24.2.to_q.bias": { "min": -0.23540745675563812, "max": 0.20547473430633545, "mean": 0.0002399118966422975, "std": 0.056001532822847366, "abs_mean": 0.043137334287166595, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.791190266609192, "elements": 1024, "histogram": { "counts": [ 1, 1, 2, 2, 12, 19, 32, 67, 101, 146, 172, 164, 112, 80, 42, 28, 11, 6, 1, 1 ], "bin_edges": [ -0.23540745675563812, -0.21336334943771362, -0.19131922721862793, -0.16927511990070343, -0.14723101258277893, -0.12518690526485443, -0.10314278304576874, -0.08109867572784424, -0.05905456840991974, -0.03701046109199524, -0.01496635377407074, 0.007077768445014954, 0.029121890664100647, 0.05116598308086395, 0.07321010529994965, 0.09525419771671295, 0.11729831993579865, 0.13934244215488434, 0.16138653457164764, 0.18343065679073334, 0.20547473430633545 ] } }, "transformer.layers.24.2.to_k.weight": { "min": -0.43518391251564026, "max": 0.32444700598716736, "mean": 2.422756006126292e-05, "std": 0.03412417694926262, "abs_mean": 0.026380151510238647, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 34.94275665283203, "elements": 1048576, "histogram": { "counts": [ 2, 2, 10, 24, 47, 63, 120, 180, 193, 168, 93, 50, 22, 16, 2, 5, 2, 0, 0, 1 ], "bin_edges": [ -0.11856266856193542, -0.10398302227258682, -0.08940338343381882, -0.07482373714447021, -0.06024409458041191, -0.04566445201635361, -0.031084805727005005, -0.016505166888237, -0.0019255205988883972, 0.012654125690460205, 0.02723376452922821, 0.041813403367996216, 0.056393057107925415, 0.07097269594669342, 0.08555233478546143, 0.10013198852539062, 0.11471162736415863, 0.12929126620292664, 0.14387091994285583, 0.15845054388046265, 0.17303019762039185 ] } }, "transformer.layers.24.2.to_k.bias": { "min": -5.537700176239014, "max": 7.30228853225708, "mean": -0.007349951192736626, "std": 0.6983441114425659, "abs_mean": 0.2657066583633423, "sparsity": 0.0, "shape": [ 1024 ], "norm": 22.33733558654785, "elements": 1024, "histogram": { "counts": [ 1, 2, 2, 3, 8, 9, 5, 23, 847, 71, 12, 8, 3, 3, 0, 0, 0, 1, 0, 2 ], "bin_edges": [ -5.537700176239014, -4.895700931549072, -4.253701210021973, -3.6117019653320312, -2.9697024822235107, -2.3277029991149902, -1.6857037544250488, -1.0437040328979492, -0.4017047882080078, 0.2402944564819336, 0.8822941780090332, 1.5242934226989746, 2.166292667388916, 2.8082919120788574, 3.4502921104431152, 4.092291355133057, 4.734290599822998, 5.3762898445129395, 6.018289089202881, 6.660289287567139, 7.30228853225708 ] } }, "transformer.layers.24.2.to_v.weight": { "min": -0.34386035799980164, "max": 0.3621582090854645, "mean": 0.00010323335300199687, "std": 0.04783642664551735, "abs_mean": 0.03668797016143799, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 48.983909606933594, "elements": 1048576, "histogram": { "counts": [ 3, 4, 1, 8, 15, 35, 56, 107, 168, 195, 164, 102, 70, 37, 14, 15, 3, 1, 1, 1 ], "bin_edges": [ -0.18854962289333344, -0.16854910552501678, -0.14854860305786133, -0.12854808568954468, -0.10854757577180862, -0.08854706585407257, -0.06854654848575592, -0.048546046018600464, -0.028545528650283813, -0.008545011281967163, 0.011455491185188293, 0.031456008553504944, 0.051456525921821594, 0.07145704329013824, 0.09145753085613251, 0.11145804822444916, 0.1314585655927658, 0.15145908296108246, 0.1714596003293991, 0.19146008789539337, 0.21146059036254883 ] } }, "transformer.layers.24.2.to_v.bias": { "min": -0.07365774363279343, "max": 0.060269735753536224, "mean": 0.0009362755226902664, "std": 0.014931198209524155, "abs_mean": 0.012213543988764286, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.47850388288497925, "elements": 1024, "histogram": { "counts": [ 1, 2, 2, 0, 0, 1, 7, 52, 124, 144, 140, 145, 164, 135, 60, 13, 4, 3, 0, 3 ], "bin_edges": [ -0.07365774363279343, -0.066961370408535, -0.06026499718427658, -0.05356862023472786, -0.04687224701046944, -0.040175873786211014, -0.03347949683666229, -0.02678312361240387, -0.020086750388145447, -0.013390377163887024, -0.006694003939628601, 2.3692846298217773e-06, 0.0066987499594688416, 0.013395123183727264, 0.020091496407985687, 0.02678786963224411, 0.03348424285650253, 0.040180616080760956, 0.04687698930501938, 0.0535733625292778, 0.060269735753536224 ] } }, "transformer.layers.24.2.to_out.0.weight": { "min": -0.2561565041542053, "max": 0.2865042984485626, "mean": 4.9739428504835814e-06, "std": 0.04156460985541344, "abs_mean": 0.03219493851065636, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 42.561676025390625, "elements": 1048576, "histogram": { "counts": [ 4, 2, 9, 11, 24, 43, 56, 77, 109, 123, 150, 116, 94, 72, 41, 43, 12, 7, 5, 2 ], "bin_edges": [ -0.12750108540058136, -0.11490428447723389, -0.10230748355388641, -0.08971068263053894, -0.07711388170719147, -0.064517080783844, -0.05192027986049652, -0.03932347893714905, -0.026726678013801575, -0.014129877090454102, -0.0015330761671066284, 0.011063724756240845, 0.023660525679588318, 0.03625732660293579, 0.048854127526283264, 0.06145092844963074, 0.07404772937297821, 0.08664453029632568, 0.09924133121967316, 0.11183813214302063, 0.12443491816520691 ] } }, "transformer.layers.24.2.to_out.0.bias": { "min": -0.055231235921382904, "max": 0.06271004676818848, "mean": 0.00012724015687126666, "std": 0.0071450709365308285, "abs_mean": 0.0050282771699130535, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.22856685519218445, "elements": 1024, "histogram": { "counts": [ 1, 1, 0, 1, 2, 3, 10, 61, 279, 367, 219, 45, 7, 1, 2, 0, 0, 0, 0, 1 ], "bin_edges": [ -0.055231235921382904, -0.049334172159433365, -0.043437108397483826, -0.03754004091024399, -0.03164298087358475, -0.02574591524899006, -0.01984884962439537, -0.013951785862445831, -0.008054722100496292, -0.002157658338546753, 0.0037394054234027863, 0.009636469185352325, 0.015533536672592163, 0.021430596709251404, 0.02732766419649124, 0.03322472423315048, 0.03912179172039032, 0.04501885920763016, 0.0509159192442894, 0.056812986731529236, 0.06271004676818848 ] } }, "transformer.layers.24.3.g": { "min": 0.49412763118743896, "max": 1.2182179689407349, "mean": 1.0133787393569946, "std": 0.11725164949893951, "abs_mean": 1.0133787393569946, "sparsity": 0.0, "shape": [ 1024 ], "norm": 32.644248962402344, "elements": 1024, "histogram": { "counts": [ 4, 1, 1, 4, 4, 8, 16, 23, 25, 34, 38, 35, 44, 85, 136, 190, 200, 101, 43, 8 ], "bin_edges": [ 0.49412763118743896, 0.5303321480751038, 0.5665366649627686, 0.6027411818504333, 0.6389456987380981, 0.6751502156257629, 0.7113547325134277, 0.7475592494010925, 0.7837637662887573, 0.8199682831764221, 0.8561728000640869, 0.8923773169517517, 0.9285818338394165, 0.9647863507270813, 1.000990867614746, 1.0371954441070557, 1.0733999013900757, 1.1096043586730957, 1.1458089351654053, 1.1820135116577148, 1.2182179689407349 ] } }, "transformer.layers.24.4.ff.0.0.weight": { "min": -1.0939558744430542, "max": 1.0474863052368164, "mean": -4.8846173740457743e-05, "std": 0.052417904138565063, "abs_mean": 0.03914271295070648, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 107.33837127685547, "elements": 4194304, "histogram": { "counts": [ 1, 2, 0, 0, 2, 3, 6, 13, 41, 76, 113, 190, 179, 154, 120, 62, 22, 10, 1, 5 ], "bin_edges": [ -0.27871084213256836, -0.2560090720653534, -0.23330731689929962, -0.21060556173324585, -0.18790379166603088, -0.1652020364999771, -0.14250028133392334, -0.11979851126670837, -0.0970967561006546, -0.07439500093460083, -0.051693230867385864, -0.028991475701332092, -0.00628972053527832, 0.016412049531936646, 0.03911381959915161, 0.06181555986404419, 0.08451732993125916, 0.10721909999847412, 0.1299208402633667, 0.15262261033058167, 0.17532438039779663 ] } }, "transformer.layers.24.4.ff.0.0.bias": { "min": -0.22328178584575653, "max": 0.172784686088562, "mean": -0.02721056528389454, "std": 0.0362662672996521, "abs_mean": 0.03248982131481171, "sparsity": 0.0, "shape": [ 4096 ], "norm": 2.9014923572540283, "elements": 4096, "histogram": { "counts": [ 1, 4, 2, 2, 1, 9, 4, 23, 19, 29, 48, 109, 177, 239, 193, 95, 29, 11, 2, 3 ], "bin_edges": [ -0.22328178584575653, -0.20809650421142578, -0.19291120767593384, -0.1777259260416031, -0.16254064440727234, -0.1473553478717804, -0.13217006623744965, -0.1169847846031189, -0.10179949551820755, -0.0866142064332962, -0.07142892479896545, -0.056243643164634705, -0.04105834662914276, -0.02587306499481201, -0.010687783360481262, 0.004497513175010681, 0.01968279480934143, 0.03486807644367218, 0.050053372979164124, 0.06523863971233368, 0.08042393624782562 ] } }, "transformer.layers.24.4.ff.2.weight": { "min": -0.8832080960273743, "max": 0.9217195510864258, "mean": -0.00014604278840124607, "std": 0.05329865962266922, "abs_mean": 0.03896614536643028, "sparsity": 0.0, "shape": [ 1024, 4096 ], "norm": 109.14219665527344, "elements": 4194304, "histogram": { "counts": [ 1, 2, 4, 7, 22, 43, 97, 163, 233, 197, 101, 63, 35, 14, 8, 5, 4, 0, 0, 1 ], "bin_edges": [ -0.20571710169315338, -0.18189872801303864, -0.15808036923408508, -0.13426199555397034, -0.11044362187385559, -0.08662524819374084, -0.06280688941478729, -0.038988515734672546, -0.0151701420545578, 0.008648231625556946, 0.03246660530567169, 0.05628497898578644, 0.0801033228635788, 0.10392169654369354, 0.1277400702238083, 0.15155844390392303, 0.17537681758403778, 0.19919519126415253, 0.22301356494426727, 0.24683193862438202, 0.27065032720565796 ] } }, "transformer.layers.24.4.ff.2.bias": { "min": -0.1707809567451477, "max": 0.3790228068828583, "mean": 0.003364440519362688, "std": 0.03984135016798973, "abs_mean": 0.02741703949868679, "sparsity": 0.0, "shape": [ 1024 ], "norm": 1.27884042263031, "elements": 1024, "histogram": { "counts": [ 3, 0, 6, 23, 106, 261, 336, 194, 47, 8, 9, 0, 1, 2, 2, 0, 0, 1, 0, 1 ], "bin_edges": [ -0.1707809567451477, -0.14329077303409576, -0.11580058187246323, -0.08831039071083069, -0.06082020699977875, -0.03333002328872681, -0.005839824676513672, 0.02165035903453827, 0.04914054274559021, 0.07663072645664215, 0.10412091016769409, 0.13161110877990723, 0.15910130739212036, 0.1865914762020111, 0.21408167481422424, 0.241571843624115, 0.2690620422363281, 0.29655224084854126, 0.324042409658432, 0.35153257846832275, 0.3790228068828583 ] } }, "transformer.layers.25.0.weight": { "min": -0.7773804068565369, "max": 0.7221406698226929, "mean": 1.8065227777697146e-05, "std": 0.04615423083305359, "abs_mean": 0.03191829100251198, "sparsity": 0.0, "shape": [ 1024, 2048 ], "norm": 66.83527374267578, "elements": 2097152, "histogram": { "counts": [ 2, 1, 1, 1, 2, 4, 5, 18, 47, 124, 255, 281, 150, 51, 28, 15, 10, 3, 1, 1 ], "bin_edges": [ -0.28086602687835693, -0.2556462585926056, -0.23042649030685425, -0.2052067071199417, -0.17998693883419037, -0.15476717054843903, -0.1295473873615265, -0.10432761907577515, -0.0791078507900238, -0.05388808250427246, -0.028668314218521118, -0.0034485459327697754, 0.021771252155303955, 0.0469910204410553, 0.07221078872680664, 0.09743055701255798, 0.12265032529830933, 0.14787009358406067, 0.173089861869812, 0.19830963015556335, 0.2235294133424759 ] } }, "transformer.layers.25.1.g": { "min": 0.33866187930107117, "max": 1.425328254699707, "mean": 0.9481796622276306, "std": 0.20640140771865845, "abs_mean": 0.9481796622276306, "sparsity": 0.0, "shape": [ 1024 ], "norm": 31.051620483398438, "elements": 1024, "histogram": { "counts": [ 5, 9, 9, 26, 44, 41, 29, 40, 48, 61, 72, 91, 134, 120, 135, 83, 44, 6, 2, 1 ], "bin_edges": [ 0.33866187930107117, 0.3929952085018158, 0.44732850790023804, 0.5016618371009827, 0.5559951663017273, 0.6103284358978271, 0.6646617650985718, 0.7189950942993164, 0.773328423500061, 0.8276617527008057, 0.8819950819015503, 0.9363284111022949, 0.9906617403030396, 1.0449950695037842, 1.0993282794952393, 1.1536616086959839, 1.2079949378967285, 1.2623282670974731, 1.3166615962982178, 1.3709949254989624, 1.425328254699707 ] } }, "transformer.layers.25.2.to_q.weight": { "min": -1.7458044290542603, "max": 1.704500436782837, "mean": 0.00022708994220010936, "std": 0.15870554745197296, "abs_mean": 0.0743621215224266, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 162.51336669921875, "elements": 1048576, "histogram": { "counts": [ 2, 4, 6, 6, 9, 11, 14, 150, 679, 56, 13, 20, 12, 8, 3, 2, 1, 0, 2, 2 ], "bin_edges": [ -0.9227117896080017, -0.81264328956604, -0.7025748491287231, -0.5925063490867615, -0.4824378490447998, -0.37236934900283813, -0.26230090856552124, -0.15223240852355957, -0.0421639084815979, 0.06790459156036377, 0.17797309160232544, 0.28804153203964233, 0.39810997247695923, 0.5081785321235657, 0.6182469725608826, 0.728315532207489, 0.8383839726448059, 0.9484524130821228, 1.058521032333374, 1.1685893535614014, 1.2786579132080078 ] } }, "transformer.layers.25.2.to_q.bias": { "min": -1.19757080078125, "max": 1.0991984605789185, "mean": -0.009535851888358593, "std": 0.2035919725894928, "abs_mean": 0.08614380657672882, "sparsity": 0.0, "shape": [ 1024 ], "norm": 6.518906593322754, "elements": 1024, "histogram": { "counts": [ 2, 1, 6, 6, 8, 15, 8, 9, 12, 66, 780, 35, 9, 10, 12, 9, 5, 2, 1, 4 ], "bin_edges": [ -1.19757080078125, -1.0827323198318481, -0.9678938984870911, -0.8530554175376892, -0.7382169961929321, -0.6233785152435303, -0.5085400342941284, -0.39370161294937134, -0.2788631319999695, -0.1640247106552124, -0.04918622970581055, 0.06565225124359131, 0.18049073219299316, 0.295329213142395, 0.4101675748825073, 0.5250060558319092, 0.639844536781311, 0.7546830177307129, 0.8695213794708252, 0.9843599796295166, 1.0991984605789185 ] } }, "transformer.layers.25.2.to_k.weight": { "min": -0.4207988381385803, "max": 0.4279989004135132, "mean": 6.386132736224681e-05, "std": 0.04802023991942406, "abs_mean": 0.03564126417040825, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 49.1722412109375, "elements": 1048576, "histogram": { "counts": [ 1, 1, 1, 3, 2, 8, 19, 11, 36, 96, 134, 196, 213, 135, 78, 38, 11, 11, 2, 4 ], "bin_edges": [ -0.24734075367450714, -0.22657868266105652, -0.2058166116476059, -0.18505454063415527, -0.16429248452186584, -0.14353039860725403, -0.1227683424949646, -0.10200627148151398, -0.08124420046806335, -0.06048212945461273, -0.03972005844116211, -0.018957987427711487, 0.001804068684577942, 0.02256615459918976, 0.04332821071147919, 0.064090296626091, 0.08485235273838043, 0.10561440885066986, 0.12637649476528168, 0.1471385508775711, 0.16790063679218292 ] } }, "transformer.layers.25.2.to_k.bias": { "min": -19.71625328063965, "max": 19.51169776916504, "mean": -0.24800625443458557, "std": 4.769559860229492, "abs_mean": 1.9953224658966064, "sparsity": 0.0, "shape": [ 1024 ], "norm": 152.75767517089844, "elements": 1024, "histogram": { "counts": [ 7, 7, 15, 13, 9, 12, 5, 1, 7, 355, 506, 9, 0, 1, 9, 8, 14, 8, 8, 6 ], "bin_edges": [ -19.71625328063965, -17.75485610961914, -15.793457984924316, -13.832060813903809, -11.870662689208984, -9.909265518188477, -7.947868347167969, -5.9864702224731445, -4.025073051452637, -2.0636749267578125, -0.10227775573730469, 1.8591194152832031, 3.820516586303711, 5.781913757324219, 7.743312835693359, 9.704710006713867, 11.666107177734375, 13.627504348754883, 15.588903427124023, 17.5502986907959, 19.51169776916504 ] } }, "transformer.layers.25.2.to_v.weight": { "min": -0.3236338496208191, "max": 0.438272625207901, "mean": -1.1853735486511141e-05, "std": 0.04616710543632507, "abs_mean": 0.03471195697784424, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 47.274478912353516, "elements": 1048576, "histogram": { "counts": [ 1, 2, 7, 8, 26, 35, 69, 148, 207, 219, 115, 78, 48, 18, 11, 5, 0, 1, 0, 2 ], "bin_edges": [ -0.17851462960243225, -0.15869757533073425, -0.13888052105903625, -0.11906348168849945, -0.09924642741680145, -0.07942937314510345, -0.05961232632398605, -0.03979527950286865, -0.019978225231170654, -0.00016117095947265625, 0.019655883312225342, 0.039472922682762146, 0.059289976954460144, 0.07910701632499695, 0.09892407059669495, 0.11874112486839294, 0.13855817914009094, 0.15837523341178894, 0.17819228768348694, 0.19800934195518494, 0.21782641112804413 ] } }, "transformer.layers.25.2.to_v.bias": { "min": -0.03371698036789894, "max": 0.03678824380040169, "mean": 0.0006397695397026837, "std": 0.0129077835008502, "abs_mean": 0.010960067622363567, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.41335463523864746, "elements": 1024, "histogram": { "counts": [ 4, 6, 12, 33, 52, 72, 78, 88, 88, 86, 71, 93, 76, 102, 84, 38, 5, 7, 4, 1 ], "bin_edges": [ -0.03371698036789894, -0.03019171953201294, -0.026666458696126938, -0.023141195997595787, -0.019615935161709785, -0.016090674325823784, -0.012565411627292633, -0.009040150791406631, -0.00551488995552063, -0.0019896291196346283, 0.0015356317162513733, 0.005060892552137375, 0.008586157113313675, 0.012111417949199677, 0.015636678785085678, 0.01916193962097168, 0.02268720045685768, 0.026212461292743683, 0.029737722128629684, 0.033262986689805984, 0.03678824380040169 ] } }, "transformer.layers.25.2.to_out.0.weight": { "min": -0.7031863331794739, "max": 0.6687424182891846, "mean": 4.257483305991627e-05, "std": 0.057892125099897385, "abs_mean": 0.039218515157699585, "sparsity": 0.0, "shape": [ 1024, 1024 ], "norm": 59.28053283691406, "elements": 1048576, "histogram": { "counts": [ 1, 12, 9, 18, 31, 51, 104, 206, 272, 158, 63, 32, 18, 15, 5, 1, 1, 1, 1, 1 ], "bin_edges": [ -0.21166379749774933, -0.18592607975006104, -0.16018837690353394, -0.13445065915584564, -0.10871294140815735, -0.08297522366046906, -0.057237520813941956, -0.03149980306625366, -0.005762085318565369, 0.019975632429122925, 0.04571335017681122, 0.07145105302333832, 0.09718875586986542, 0.1229264885187149, 0.148664191365242, 0.1744019240140915, 0.2001396268606186, 0.2258773297071457, 0.251615047454834, 0.2773527503013611, 0.30309048295021057 ] } }, "transformer.layers.25.2.to_out.0.bias": { "min": -0.0722241997718811, "max": 0.0676589161157608, "mean": -0.0001341316383332014, "std": 0.012878631241619587, "abs_mean": 0.008325816132128239, "sparsity": 0.0, "shape": [ 1024 ], "norm": 0.4119373559951782, "elements": 1024, "histogram": { "counts": [ 3, 3, 6, 1, 4, 5, 7, 38, 79, 223, 346, 171, 72, 23, 8, 2, 4, 2, 1, 2 ], "bin_edges": [ -0.0722241997718811, -0.06523004174232483, -0.05823588743805885, -0.05124173313379288, -0.0442475751042366, -0.03725342079997063, -0.03025926649570465, -0.023265108466148376, -0.0162709541618824, -0.009276799857616425, -0.00228264182806015, 0.004711516201496124, 0.011705666780471802, 0.018699824810028076, 0.02569398283958435, 0.03268813341856003, 0.0396822914481163, 0.04667644947767258, 0.053670600056648254, 0.06066475808620453, 0.0676589161157608 ] } }, "transformer.layers.25.3.g": { "min": 0.38035547733306885, "max": 1.3902052640914917, "mean": 1.066498041152954, "std": 0.21949008107185364, "abs_mean": 1.066498041152954, "sparsity": 0.0, "shape": [ 1024 ], "norm": 34.842498779296875, "elements": 1024, "histogram": { "counts": [ 2, 7, 11, 24, 24, 30, 16, 20, 26, 38, 39, 43, 59, 60, 89, 119, 174, 135, 69, 15 ], "bin_edges": [ 0.38035547733306885, 0.43084797263145447, 0.4813404679298401, 0.5318329334259033, 0.5823254585266113, 0.6328179240226746, 0.6833103895187378, 0.7338029146194458, 0.784295380115509, 0.8347878456115723, 0.8852803707122803, 0.9357728362083435, 0.9862653017044067, 1.0367578268051147, 1.0872502326965332, 1.1377427577972412, 1.1882352828979492, 1.2387278079986572, 1.2892203330993652, 1.3397127389907837, 1.3902052640914917 ] } }, "transformer.layers.25.4.ff.0.0.weight": { "min": -0.6164002418518066, "max": 0.7182905673980713, "mean": 0.00011321296915411949, "std": 0.05802781134843826, "abs_mean": 0.0431867316365242, "sparsity": 0.0, "shape": [ 4096, 1024 ], "norm": 118.82687377929688, "elements": 4194304, "histogram": { "counts": [ 1, 3, 8, 11, 27, 53, 87, 150, 198, 193, 132, 66, 42, 20, 4, 3, 1, 0, 0, 1 ], "bin_edges": [ -0.22690832614898682, -0.20144495368003845, -0.1759815663099289, -0.15051819384098053, -0.12505480647087097, -0.09959143400192261, -0.07412806153297424, -0.048664674162864685, -0.02320130169391632, 0.0022620707750320435, 0.0277254581451416, 0.053188830614089966, 0.07865220308303833, 0.1041155755519867, 0.12957897782325745, 0.1550423502922058, 0.18050572276115417, 0.20596909523010254, 0.2314324676990509, 0.25689586997032166, 0.2823592722415924 ] } }, "transformer.layers.25.4.ff.0.0.bias": { "min": -0.2184617668390274, "max": 0.22462666034698486, "mean": 0.006169781554490328, "std": 0.04965030029416084, "abs_mean": 0.035798974335193634, "sparsity": 0.0, "shape": [ 4096 ], "norm": 3.201674222946167, "elements": 4096, "histogram": { "counts": [ 1, 1, 1, 0, 4, 13, 27, 78, 190, 228, 194, 93, 54, 30, 30, 26, 20, 6, 3, 1 ], "bin_edges": [ -0.20836390554904938, -0.18725517392158508, -0.16614645719528198, -0.1450377255678177, -0.1239289939403534, -0.1028202623128891, -0.081711545586586, -0.060602813959121704, -0.03949408233165741, -0.018385350704193115, 0.002723380923271179, 0.02383209764957428, 0.04494081437587738, 0.06604956090450287, 0.08715827763080597, 0.10826702415943146, 0.12937574088573456, 0.15048445761203766, 0.17159320414066315, 0.19270192086696625, 0.21381068229675293 ] } }, "transformer.layers.25.4.ff.2.weight": { "min": -0.6297575831413269, "max": 0.8895801901817322, "mean": 1.2445923857740127e-05, "std": 0.023545311763882637, "abs_mean": 0.015362618491053581, "sparsity": 0.0, "shape": [ 1024, 4096 ], "norm": 48.21366500854492, "elements": 4194304, "histogram": { "counts": [ 5, 3, 5, 35, 229, 504, 180, 29, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1 ], "bin_edges": [ -0.12258616834878922, -0.10009890794754028, -0.07761164009571075, -0.05512437969446182, -0.032637111842632294, -0.010149843990802765, 0.01233740895986557, 0.0348246768116951, 0.05731194466352463, 0.07979921251535416, 0.10228648036718369, 0.12477373331785202, 0.14726099371910095, 0.16974827647209167, 0.19223552942276, 0.21472281217575073, 0.23721006512641907, 0.2596973180770874, 0.2821846008300781, 0.30467185378074646, 0.3271591365337372 ] } }, "transformer.layers.25.4.ff.2.bias": { "min": -0.506031334400177, "max": 0.47297078371047974, "mean": -0.0030135007109493017, "std": 0.0691458210349083, "abs_mean": 0.029873624444007874, "sparsity": 0.0, "shape": [ 1024 ], "norm": 2.213686943054199, "elements": 1024, "histogram": { "counts": [ 4, 4, 2, 2, 2, 3, 1, 6, 6, 201, 670, 87, 1, 2, 0, 1, 1, 1, 1, 5 ], "bin_edges": [ -0.506031334400177, -0.45708122849464417, -0.40813112258911133, -0.3591810166835785, -0.31023091077804565, -0.2612808048725128, -0.21233069896697998, -0.16338059306144714, -0.1144304871559143, -0.06548038125038147, -0.016530275344848633, 0.03241986036300659, 0.08136993646621704, 0.1303200125694275, 0.17927014827728271, 0.22822028398513794, 0.2771703600883484, 0.32612043619155884, 0.37507057189941406, 0.4240207076072693, 0.47297078371047974 ] } }, "transformer.norm_out.g": { "min": 0.5383259057998657, "max": 1.1772801876068115, "mean": 0.7824772596359253, "std": 0.09824033081531525, "abs_mean": 0.7824772596359253, "sparsity": 0.0, "shape": [ 1024 ], "norm": 25.23565673828125, "elements": 1024, "histogram": { "counts": [ 3, 19, 63, 60, 66, 97, 101, 119, 118, 123, 91, 66, 35, 21, 13, 2, 2, 0, 0, 1 ], "bin_edges": [ 0.5383259057998657, 0.5702736377716064, 0.6022213101387024, 0.6341690421104431, 0.6661167740821838, 0.6980644464492798, 0.7300121784210205, 0.7619599103927612, 0.793907642364502, 0.8258553147315979, 0.8578030467033386, 0.8897507190704346, 0.9216984510421753, 0.953646183013916, 0.9855939149856567, 1.0175416469573975, 1.0494892597198486, 1.081437110900879, 1.11338472366333, 1.1453324556350708, 1.1772801876068115 ] } }, "transformer.proj_out.weight": { "min": -0.26664498448371887, "max": 0.2126948982477188, "mean": -0.00022273289505392313, "std": 0.05400582030415535, "abs_mean": 0.043136853724718094, "sparsity": 0.0, "shape": [ 100, 1024 ], "norm": 17.281917572021484, "elements": 102400, "histogram": { "counts": [ 3, 3, 7, 10, 28, 36, 64, 82, 123, 132, 126, 130, 94, 71, 41, 28, 11, 9, 0, 2 ], "bin_edges": [ -0.18576863408088684, -0.16740620136260986, -0.1490437537431717, -0.13068132102489471, -0.11231888085603714, -0.09395644068717957, -0.07559400796890259, -0.057231560349464417, -0.03886912763118744, -0.02050669491291046, -0.00214424729347229, 0.016218185424804688, 0.034580618143081665, 0.052943065762519836, 0.07130551338195801, 0.08966794610023499, 0.10803037881851196, 0.12639281153678894, 0.14475524425506592, 0.16311770677566528, 0.18148015439510345 ] } }, "transformer.proj_out.bias": { "min": -0.23798410594463348, "max": 0.014864158816635609, "mean": -0.04389958456158638, "std": 0.03423725813627243, "abs_mean": 0.045042671263217926, "sparsity": 0.0, "shape": [ 100 ], "norm": 0.5556654930114746, "elements": 100 } }, "layer_importance_scores": { "transformer.time_embed.time_mlp.0.weight": 85.03633259568599, "transformer.time_embed.time_mlp.0.bias": 85.0001419242019, "transformer.time_embed.time_mlp.2.weight": 85.14533038274396, "transformer.time_embed.time_mlp.2.bias": 85.0001419242019, "transformer.text_embed.text_embed.weight": 85.03528701347982, "transformer.input_embed.proj.weight": 80.04257726056952, "transformer.input_embed.proj.bias": 80.0001419242019, "transformer.input_embed.conv_pos_embed.conv1d.0.weight": 85.28157761656642, "transformer.input_embed.conv_pos_embed.conv1d.0.bias": 46.03894230581882, "transformer.input_embed.conv_pos_embed.conv1d.2.weight": 85.28157761656642, "transformer.input_embed.conv_pos_embed.conv1d.2.bias": 51.933818153226454, "transformer.layers.0.1.g": 11.532497627358527, "transformer.layers.0.2.to_q.weight": 60.14533038274396, "transformer.layers.0.2.to_q.bias": 60.0001419242019, "transformer.layers.0.2.to_k.weight": 60.14533038274396, "transformer.layers.0.2.to_k.bias": 60.0001419242019, "transformer.layers.0.2.to_v.weight": 60.14533038274396, "transformer.layers.0.2.to_v.bias": 60.0001419242019, "transformer.layers.0.2.to_out.0.weight": 60.14533038274396, "transformer.layers.0.2.to_out.0.bias": 60.0001419242019, "transformer.layers.0.3.g": 11.959340362928835, "transformer.layers.0.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.0.4.ff.0.0.bias": 24.463798488610298, "transformer.layers.0.4.ff.2.weight": 60.58132153097584, "transformer.layers.0.4.ff.2.bias": 23.219212211070626, "transformer.layers.1.1.g": 11.537370238647217, "transformer.layers.1.2.to_q.weight": 60.14533038274396, "transformer.layers.1.2.to_q.bias": 60.0001419242019, "transformer.layers.1.2.to_k.weight": 60.14533038274396, "transformer.layers.1.2.to_k.bias": 60.0001419242019, "transformer.layers.1.2.to_v.weight": 60.14533038274396, "transformer.layers.1.2.to_v.bias": 60.0001419242019, "transformer.layers.1.2.to_out.0.weight": 60.14533038274396, "transformer.layers.1.2.to_out.0.bias": 60.0001419242019, "transformer.layers.1.3.g": 11.461919656381479, "transformer.layers.1.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.1.4.ff.0.0.bias": 18.705593070190794, "transformer.layers.1.4.ff.2.weight": 60.58132153097584, "transformer.layers.1.4.ff.2.bias": 38.259093163379504, "transformer.layers.2.1.g": 11.321299503885337, "transformer.layers.2.2.to_q.weight": 60.14533038274396, "transformer.layers.2.2.to_q.bias": 60.0001419242019, "transformer.layers.2.2.to_k.weight": 60.14533038274396, "transformer.layers.2.2.to_k.bias": 60.0001419242019, "transformer.layers.2.2.to_v.weight": 60.14533038274396, "transformer.layers.2.2.to_v.bias": 60.0001419242019, "transformer.layers.2.2.to_out.0.weight": 60.14533038274396, "transformer.layers.2.2.to_out.0.bias": 60.0001419242019, "transformer.layers.2.3.g": 11.455490462959915, "transformer.layers.2.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.2.4.ff.0.0.bias": 18.206288585332846, "transformer.layers.2.4.ff.2.weight": 60.58132153097584, "transformer.layers.2.4.ff.2.bias": 60.0001419242019, "transformer.layers.3.1.g": 11.123831840415704, "transformer.layers.3.2.to_q.weight": 60.14533038274396, "transformer.layers.3.2.to_q.bias": 60.0001419242019, "transformer.layers.3.2.to_k.weight": 60.14533038274396, "transformer.layers.3.2.to_k.bias": 60.0001419242019, "transformer.layers.3.2.to_v.weight": 60.14533038274396, "transformer.layers.3.2.to_v.bias": 60.0001419242019, "transformer.layers.3.2.to_out.0.weight": 60.14533038274396, "transformer.layers.3.2.to_out.0.bias": 60.0001419242019, "transformer.layers.3.3.g": 11.376823885895679, "transformer.layers.3.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.3.4.ff.0.0.bias": 18.229989548202674, "transformer.layers.3.4.ff.2.weight": 60.58132153097584, "transformer.layers.3.4.ff.2.bias": 60.0001419242019, "transformer.layers.4.1.g": 10.92711471707068, "transformer.layers.4.2.to_q.weight": 60.14533038274396, "transformer.layers.4.2.to_q.bias": 60.0001419242019, "transformer.layers.4.2.to_k.weight": 60.14533038274396, "transformer.layers.4.2.to_k.bias": 60.0001419242019, "transformer.layers.4.2.to_v.weight": 60.14533038274396, "transformer.layers.4.2.to_v.bias": 60.0001419242019, "transformer.layers.4.2.to_out.0.weight": 60.14533038274396, "transformer.layers.4.2.to_out.0.bias": 60.0001419242019, "transformer.layers.4.3.g": 10.986938392283891, "transformer.layers.4.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.4.4.ff.0.0.bias": 17.15491654224864, "transformer.layers.4.4.ff.2.weight": 60.58132153097584, "transformer.layers.4.4.ff.2.bias": 60.0001419242019, "transformer.layers.5.1.g": 10.90192046488536, "transformer.layers.5.2.to_q.weight": 60.14533038274396, "transformer.layers.5.2.to_q.bias": 60.0001419242019, "transformer.layers.5.2.to_k.weight": 60.14533038274396, "transformer.layers.5.2.to_k.bias": 60.0001419242019, "transformer.layers.5.2.to_v.weight": 60.14533038274396, "transformer.layers.5.2.to_v.bias": 60.0001419242019, "transformer.layers.5.2.to_out.0.weight": 60.14533038274396, "transformer.layers.5.2.to_out.0.bias": 60.0001419242019, "transformer.layers.5.3.g": 10.853532619025422, "transformer.layers.5.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.5.4.ff.0.0.bias": 17.052553682686636, "transformer.layers.5.4.ff.2.weight": 60.58132153097584, "transformer.layers.5.4.ff.2.bias": 60.0001419242019, "transformer.layers.6.1.g": 10.874520652442698, "transformer.layers.6.2.to_q.weight": 60.14533038274396, "transformer.layers.6.2.to_q.bias": 60.0001419242019, "transformer.layers.6.2.to_k.weight": 60.14533038274396, "transformer.layers.6.2.to_k.bias": 60.0001419242019, "transformer.layers.6.2.to_v.weight": 60.14533038274396, "transformer.layers.6.2.to_v.bias": 60.0001419242019, "transformer.layers.6.2.to_out.0.weight": 60.14533038274396, "transformer.layers.6.2.to_out.0.bias": 60.0001419242019, "transformer.layers.6.3.g": 10.785119275660719, "transformer.layers.6.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.6.4.ff.0.0.bias": 16.446733745711406, "transformer.layers.6.4.ff.2.weight": 60.58132153097584, "transformer.layers.6.4.ff.2.bias": 60.0001419242019, "transformer.layers.7.1.g": 10.737621414760007, "transformer.layers.7.2.to_q.weight": 60.14533038274396, "transformer.layers.7.2.to_q.bias": 60.0001419242019, "transformer.layers.7.2.to_k.weight": 60.14533038274396, "transformer.layers.7.2.to_k.bias": 60.0001419242019, "transformer.layers.7.2.to_v.weight": 60.14533038274396, "transformer.layers.7.2.to_v.bias": 60.0001419242019, "transformer.layers.7.2.to_out.0.weight": 60.14533038274396, "transformer.layers.7.2.to_out.0.bias": 60.0001419242019, "transformer.layers.7.3.g": 10.77605944709332, "transformer.layers.7.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.7.4.ff.0.0.bias": 16.069873949263954, "transformer.layers.7.4.ff.2.weight": 60.58132153097584, "transformer.layers.7.4.ff.2.bias": 60.0001419242019, "transformer.layers.8.1.g": 10.71839406869809, "transformer.layers.8.2.to_q.weight": 60.14533038274396, "transformer.layers.8.2.to_q.bias": 60.0001419242019, "transformer.layers.8.2.to_k.weight": 60.14533038274396, "transformer.layers.8.2.to_k.bias": 60.0001419242019, "transformer.layers.8.2.to_v.weight": 60.14533038274396, "transformer.layers.8.2.to_v.bias": 60.0001419242019, "transformer.layers.8.2.to_out.0.weight": 60.14533038274396, "transformer.layers.8.2.to_out.0.bias": 60.0001419242019, "transformer.layers.8.3.g": 10.7574903396269, "transformer.layers.8.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.8.4.ff.0.0.bias": 15.766046880990155, "transformer.layers.8.4.ff.2.weight": 60.58132153097584, "transformer.layers.8.4.ff.2.bias": 60.0001419242019, "transformer.layers.9.1.g": 10.733062207419012, "transformer.layers.9.2.to_q.weight": 60.14533038274396, "transformer.layers.9.2.to_q.bias": 60.0001419242019, "transformer.layers.9.2.to_k.weight": 60.14533038274396, "transformer.layers.9.2.to_k.bias": 60.0001419242019, "transformer.layers.9.2.to_v.weight": 60.14533038274396, "transformer.layers.9.2.to_v.bias": 60.0001419242019, "transformer.layers.9.2.to_out.0.weight": 60.14533038274396, "transformer.layers.9.2.to_out.0.bias": 60.0001419242019, "transformer.layers.9.3.g": 10.823180888090741, "transformer.layers.9.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.9.4.ff.0.0.bias": 15.847623263476386, "transformer.layers.9.4.ff.2.weight": 60.58132153097584, "transformer.layers.9.4.ff.2.bias": 60.0001419242019, "transformer.layers.10.1.g": 10.716516084338693, "transformer.layers.10.2.to_q.weight": 60.14533038274396, "transformer.layers.10.2.to_q.bias": 60.0001419242019, "transformer.layers.10.2.to_k.weight": 60.14533038274396, "transformer.layers.10.2.to_k.bias": 60.0001419242019, "transformer.layers.10.2.to_v.weight": 60.14533038274396, "transformer.layers.10.2.to_v.bias": 60.0001419242019, "transformer.layers.10.2.to_out.0.weight": 60.14533038274396, "transformer.layers.10.2.to_out.0.bias": 60.0001419242019, "transformer.layers.10.3.g": 10.76196498496441, "transformer.layers.10.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.10.4.ff.0.0.bias": 15.759326001570306, "transformer.layers.10.4.ff.2.weight": 60.58132153097584, "transformer.layers.10.4.ff.2.bias": 60.0001419242019, "transformer.layers.11.1.g": 10.000141924201898, "transformer.layers.11.2.to_q.weight": 60.14533038274396, "transformer.layers.11.2.to_q.bias": 60.0001419242019, "transformer.layers.11.2.to_k.weight": 60.14533038274396, "transformer.layers.11.2.to_k.bias": 60.0001419242019, "transformer.layers.11.2.to_v.weight": 0.14533038274395965, "transformer.layers.11.2.to_v.bias": 0.0001419242018983981, "transformer.layers.11.2.to_out.0.weight": 0.14533038274395965, "transformer.layers.11.2.to_out.0.bias": 0.0001419242018983981, "transformer.layers.11.3.g": 10.000141924201898, "transformer.layers.11.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.11.4.ff.0.0.bias": 60.00056769680759, "transformer.layers.11.4.ff.2.weight": 0.5813215309758386, "transformer.layers.11.4.ff.2.bias": 0.0001419242018983981, "transformer.layers.12.1.g": 10.665369544246982, "transformer.layers.12.2.to_q.weight": 60.14533038274396, "transformer.layers.12.2.to_q.bias": 60.0001419242019, "transformer.layers.12.2.to_k.weight": 60.14533038274396, "transformer.layers.12.2.to_k.bias": 60.0001419242019, "transformer.layers.12.2.to_v.weight": 60.14533038274396, "transformer.layers.12.2.to_v.bias": 60.0001419242019, "transformer.layers.12.2.to_out.0.weight": 60.14533038274396, "transformer.layers.12.2.to_out.0.bias": 60.0001419242019, "transformer.layers.12.3.g": 10.74358912251763, "transformer.layers.12.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.12.4.ff.0.0.bias": 15.775313068968435, "transformer.layers.12.4.ff.2.weight": 60.58132153097584, "transformer.layers.12.4.ff.2.bias": 60.0001419242019, "transformer.layers.13.0.weight": 60.29066076548792, "transformer.layers.13.1.g": 10.780254387046963, "transformer.layers.13.2.to_q.weight": 60.14533038274396, "transformer.layers.13.2.to_q.bias": 60.0001419242019, "transformer.layers.13.2.to_k.weight": 60.14533038274396, "transformer.layers.13.2.to_k.bias": 60.0001419242019, "transformer.layers.13.2.to_v.weight": 60.14533038274396, "transformer.layers.13.2.to_v.bias": 60.0001419242019, "transformer.layers.13.2.to_out.0.weight": 60.14533038274396, "transformer.layers.13.2.to_out.0.bias": 60.0001419242019, "transformer.layers.13.3.g": 10.723090044959745, "transformer.layers.13.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.13.4.ff.0.0.bias": 14.433770644846359, "transformer.layers.13.4.ff.2.weight": 60.58132153097584, "transformer.layers.13.4.ff.2.bias": 60.0001419242019, "transformer.layers.14.0.weight": 50.29554357798792, "transformer.layers.14.1.g": 10.000141924201898, "transformer.layers.14.2.to_q.weight": 60.14533038274396, "transformer.layers.14.2.to_q.bias": 60.0001419242019, "transformer.layers.14.2.to_k.weight": 60.14532084600079, "transformer.layers.14.2.to_k.bias": 60.0001419242019, "transformer.layers.14.2.to_v.weight": 0.14533038274395965, "transformer.layers.14.2.to_v.bias": 0.0001419242018983981, "transformer.layers.14.2.to_out.0.weight": 0.14533038274395965, "transformer.layers.14.2.to_out.0.bias": 0.0001419242018983981, "transformer.layers.14.3.g": 10.000141924201898, "transformer.layers.14.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.14.4.ff.0.0.bias": 60.00056769680759, "transformer.layers.14.4.ff.2.weight": 0.5813215309758386, "transformer.layers.14.4.ff.2.bias": 0.0001419242018983981, "transformer.layers.15.0.weight": 60.29066076548792, "transformer.layers.15.1.g": 10.786727982493778, "transformer.layers.15.2.to_q.weight": 60.14533038274396, "transformer.layers.15.2.to_q.bias": 60.0001419242019, "transformer.layers.15.2.to_k.weight": 60.14533038274396, "transformer.layers.15.2.to_k.bias": 60.0001419242019, "transformer.layers.15.2.to_v.weight": 60.14533038274396, "transformer.layers.15.2.to_v.bias": 60.0001419242019, "transformer.layers.15.2.to_out.0.weight": 60.14533038274396, "transformer.layers.15.2.to_out.0.bias": 60.0001419242019, "transformer.layers.15.3.g": 10.558397360427929, "transformer.layers.15.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.15.4.ff.0.0.bias": 14.440700574156022, "transformer.layers.15.4.ff.2.weight": 60.58132153097584, "transformer.layers.15.4.ff.2.bias": 60.0001419242019, "transformer.layers.16.0.weight": 60.29066076548792, "transformer.layers.16.1.g": 10.749619248285043, "transformer.layers.16.2.to_q.weight": 60.14533038274396, "transformer.layers.16.2.to_q.bias": 60.0001419242019, "transformer.layers.16.2.to_k.weight": 60.14533038274396, "transformer.layers.16.2.to_k.bias": 60.0001419242019, "transformer.layers.16.2.to_v.weight": 60.14533038274396, "transformer.layers.16.2.to_v.bias": 60.0001419242019, "transformer.layers.16.2.to_out.0.weight": 60.14533038274396, "transformer.layers.16.2.to_out.0.bias": 60.0001419242019, "transformer.layers.16.3.g": 10.533921774332137, "transformer.layers.16.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.16.4.ff.0.0.bias": 15.171028483803623, "transformer.layers.16.4.ff.2.weight": 60.58132153097584, "transformer.layers.16.4.ff.2.bias": 60.0001419242019, "transformer.layers.17.0.weight": 60.29066076548792, "transformer.layers.17.1.g": 10.770886668827734, "transformer.layers.17.2.to_q.weight": 60.14533038274396, "transformer.layers.17.2.to_q.bias": 60.0001419242019, "transformer.layers.17.2.to_k.weight": 60.14533038274396, "transformer.layers.17.2.to_k.bias": 60.0001419242019, "transformer.layers.17.2.to_v.weight": 60.14533038274396, "transformer.layers.17.2.to_v.bias": 60.0001419242019, "transformer.layers.17.2.to_out.0.weight": 60.14533038274396, "transformer.layers.17.2.to_out.0.bias": 60.0001419242019, "transformer.layers.17.3.g": 10.494831667837571, "transformer.layers.17.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.17.4.ff.0.0.bias": 15.415363766846895, "transformer.layers.17.4.ff.2.weight": 60.58132153097584, "transformer.layers.17.4.ff.2.bias": 60.0001419242019, "transformer.layers.18.0.weight": 60.29066076548792, "transformer.layers.18.1.g": 10.692741576840318, "transformer.layers.18.2.to_q.weight": 60.14533038274396, "transformer.layers.18.2.to_q.bias": 60.0001419242019, "transformer.layers.18.2.to_k.weight": 60.14533038274396, "transformer.layers.18.2.to_k.bias": 60.0001419242019, "transformer.layers.18.2.to_v.weight": 60.14533038274396, "transformer.layers.18.2.to_v.bias": 60.0001419242019, "transformer.layers.18.2.to_out.0.weight": 60.14533038274396, "transformer.layers.18.2.to_out.0.bias": 60.0001419242019, "transformer.layers.18.3.g": 10.514765366862893, "transformer.layers.18.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.18.4.ff.0.0.bias": 15.916675791742597, "transformer.layers.18.4.ff.2.weight": 60.58132153097584, "transformer.layers.18.4.ff.2.bias": 60.0001419242019, "transformer.layers.19.0.weight": 60.29066076548792, "transformer.layers.19.1.g": 10.767040605554461, "transformer.layers.19.2.to_q.weight": 60.14533038274396, "transformer.layers.19.2.to_q.bias": 60.0001419242019, "transformer.layers.19.2.to_k.weight": 60.14533038274396, "transformer.layers.19.2.to_k.bias": 60.0001419242019, "transformer.layers.19.2.to_v.weight": 60.14533038274396, "transformer.layers.19.2.to_v.bias": 60.0001419242019, "transformer.layers.19.2.to_out.0.weight": 60.14533038274396, "transformer.layers.19.2.to_out.0.bias": 60.0001419242019, "transformer.layers.19.3.g": 10.55900162466435, "transformer.layers.19.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.19.4.ff.0.0.bias": 16.944800765454332, "transformer.layers.19.4.ff.2.weight": 60.58132153097584, "transformer.layers.19.4.ff.2.bias": 60.0001419242019, "transformer.layers.20.0.weight": 60.29066076548792, "transformer.layers.20.1.g": 10.798194311900344, "transformer.layers.20.2.to_q.weight": 60.14533038274396, "transformer.layers.20.2.to_q.bias": 60.0001419242019, "transformer.layers.20.2.to_k.weight": 60.14533038274396, "transformer.layers.20.2.to_k.bias": 60.0001419242019, "transformer.layers.20.2.to_v.weight": 60.14533038274396, "transformer.layers.20.2.to_v.bias": 60.0001419242019, "transformer.layers.20.2.to_out.0.weight": 60.14533038274396, "transformer.layers.20.2.to_out.0.bias": 60.0001419242019, "transformer.layers.20.3.g": 10.533009943982734, "transformer.layers.20.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.20.4.ff.0.0.bias": 17.8252610437578, "transformer.layers.20.4.ff.2.weight": 60.58132153097584, "transformer.layers.20.4.ff.2.bias": 60.0001419242019, "transformer.layers.21.0.weight": 60.29066076548792, "transformer.layers.21.1.g": 10.834385789775144, "transformer.layers.21.2.to_q.weight": 60.14533038274396, "transformer.layers.21.2.to_q.bias": 60.0001419242019, "transformer.layers.21.2.to_k.weight": 60.14533038274396, "transformer.layers.21.2.to_k.bias": 60.0001419242019, "transformer.layers.21.2.to_v.weight": 60.14533038274396, "transformer.layers.21.2.to_v.bias": 60.0001419242019, "transformer.layers.21.2.to_out.0.weight": 60.14533038274396, "transformer.layers.21.2.to_out.0.bias": 60.0001419242019, "transformer.layers.21.3.g": 10.61522933626405, "transformer.layers.21.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.21.4.ff.0.0.bias": 17.882948718734298, "transformer.layers.21.4.ff.2.weight": 60.58132153097584, "transformer.layers.21.4.ff.2.bias": 60.0001419242019, "transformer.layers.22.0.weight": 60.29066076548792, "transformer.layers.22.1.g": 10.873351189083357, "transformer.layers.22.2.to_q.weight": 60.14533038274396, "transformer.layers.22.2.to_q.bias": 60.0001419242019, "transformer.layers.22.2.to_k.weight": 60.14533038274396, "transformer.layers.22.2.to_k.bias": 60.0001419242019, "transformer.layers.22.2.to_v.weight": 60.14533038274396, "transformer.layers.22.2.to_v.bias": 60.0001419242019, "transformer.layers.22.2.to_out.0.weight": 60.14533038274396, "transformer.layers.22.2.to_out.0.bias": 60.0001419242019, "transformer.layers.22.3.g": 10.738055947202142, "transformer.layers.22.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.22.4.ff.0.0.bias": 20.820610760559745, "transformer.layers.22.4.ff.2.weight": 60.58132153097584, "transformer.layers.22.4.ff.2.bias": 60.0001419242019, "transformer.layers.23.0.weight": 60.29066076548792, "transformer.layers.23.1.g": 10.960286446122295, "transformer.layers.23.2.to_q.weight": 60.14533038274396, "transformer.layers.23.2.to_q.bias": 60.0001419242019, "transformer.layers.23.2.to_k.weight": 60.14533038274396, "transformer.layers.23.2.to_k.bias": 60.0001419242019, "transformer.layers.23.2.to_v.weight": 60.14533038274396, "transformer.layers.23.2.to_v.bias": 60.0001419242019, "transformer.layers.23.2.to_out.0.weight": 60.14533038274396, "transformer.layers.23.2.to_out.0.bias": 60.0001419242019, "transformer.layers.23.3.g": 10.717452300054163, "transformer.layers.23.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.23.4.ff.0.0.bias": 21.784515291168272, "transformer.layers.23.4.ff.2.weight": 60.58132153097584, "transformer.layers.23.4.ff.2.bias": 60.0001419242019, "transformer.layers.24.0.weight": 60.29066076548792, "transformer.layers.24.1.g": 11.384080906248242, "transformer.layers.24.2.to_q.weight": 60.14533038274396, "transformer.layers.24.2.to_q.bias": 60.0001419242019, "transformer.layers.24.2.to_k.weight": 60.14533038274396, "transformer.layers.24.2.to_k.bias": 60.0001419242019, "transformer.layers.24.2.to_v.weight": 60.14533038274396, "transformer.layers.24.2.to_v.bias": 60.0001419242019, "transformer.layers.24.2.to_out.0.weight": 60.14533038274396, "transformer.layers.24.2.to_out.0.bias": 60.0001419242019, "transformer.layers.24.3.g": 11.157178725401591, "transformer.layers.24.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.24.4.ff.0.0.bias": 23.328577211228623, "transformer.layers.24.4.ff.2.weight": 60.58132153097584, "transformer.layers.24.4.ff.2.bias": 60.0001419242019, "transformer.layers.25.0.weight": 60.29066076548792, "transformer.layers.25.1.g": 12.176959419250716, "transformer.layers.25.2.to_q.weight": 60.14533038274396, "transformer.layers.25.2.to_q.bias": 60.0001419242019, "transformer.layers.25.2.to_k.weight": 60.14533038274396, "transformer.layers.25.2.to_k.bias": 60.0001419242019, "transformer.layers.25.2.to_v.weight": 60.14533038274396, "transformer.layers.25.2.to_v.bias": 60.0001419242019, "transformer.layers.25.2.to_out.0.weight": 60.14533038274396, "transformer.layers.25.2.to_out.0.bias": 60.0001419242019, "transformer.layers.25.3.g": 12.058186783192642, "transformer.layers.25.4.ff.0.0.weight": 60.58132153097584, "transformer.layers.25.4.ff.0.0.bias": 60.00056769680759, "transformer.layers.25.4.ff.2.weight": 60.58132153097584, "transformer.layers.25.4.ff.2.bias": 60.0001419242019, "transformer.norm_out.g": 26.255645897071666, "transformer.proj_out.weight": 80.01419242018984, "transformer.proj_out.bias": 37.799007512730114 } }