zombieso2 / model_analysis.json
memevis's picture
Upload folder using huggingface_hub
5647144 verified
{
"layer_types": {
"transformer": 391
},
"parameter_counts": {
"transformer.time_embed.time_mlp.0.weight": 262144,
"transformer.time_embed.time_mlp.0.bias": 1024,
"transformer.time_embed.time_mlp.2.weight": 1048576,
"transformer.time_embed.time_mlp.2.bias": 1024,
"transformer.text_embed.text_embed.weight": 254600,
"transformer.input_embed.proj.weight": 307200,
"transformer.input_embed.proj.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024,
"transformer.layers.0.1.g": 1024,
"transformer.layers.0.2.to_q.weight": 1048576,
"transformer.layers.0.2.to_q.bias": 1024,
"transformer.layers.0.2.to_k.weight": 1048576,
"transformer.layers.0.2.to_k.bias": 1024,
"transformer.layers.0.2.to_v.weight": 1048576,
"transformer.layers.0.2.to_v.bias": 1024,
"transformer.layers.0.2.to_out.0.weight": 1048576,
"transformer.layers.0.2.to_out.0.bias": 1024,
"transformer.layers.0.3.g": 1024,
"transformer.layers.0.4.ff.0.0.weight": 4194304,
"transformer.layers.0.4.ff.0.0.bias": 4096,
"transformer.layers.0.4.ff.2.weight": 4194304,
"transformer.layers.0.4.ff.2.bias": 1024,
"transformer.layers.1.1.g": 1024,
"transformer.layers.1.2.to_q.weight": 1048576,
"transformer.layers.1.2.to_q.bias": 1024,
"transformer.layers.1.2.to_k.weight": 1048576,
"transformer.layers.1.2.to_k.bias": 1024,
"transformer.layers.1.2.to_v.weight": 1048576,
"transformer.layers.1.2.to_v.bias": 1024,
"transformer.layers.1.2.to_out.0.weight": 1048576,
"transformer.layers.1.2.to_out.0.bias": 1024,
"transformer.layers.1.3.g": 1024,
"transformer.layers.1.4.ff.0.0.weight": 4194304,
"transformer.layers.1.4.ff.0.0.bias": 4096,
"transformer.layers.1.4.ff.2.weight": 4194304,
"transformer.layers.1.4.ff.2.bias": 1024,
"transformer.layers.2.1.g": 1024,
"transformer.layers.2.2.to_q.weight": 1048576,
"transformer.layers.2.2.to_q.bias": 1024,
"transformer.layers.2.2.to_k.weight": 1048576,
"transformer.layers.2.2.to_k.bias": 1024,
"transformer.layers.2.2.to_v.weight": 1048576,
"transformer.layers.2.2.to_v.bias": 1024,
"transformer.layers.2.2.to_out.0.weight": 1048576,
"transformer.layers.2.2.to_out.0.bias": 1024,
"transformer.layers.2.3.g": 1024,
"transformer.layers.2.4.ff.0.0.weight": 4194304,
"transformer.layers.2.4.ff.0.0.bias": 4096,
"transformer.layers.2.4.ff.2.weight": 4194304,
"transformer.layers.2.4.ff.2.bias": 1024,
"transformer.layers.3.1.g": 1024,
"transformer.layers.3.2.to_q.weight": 1048576,
"transformer.layers.3.2.to_q.bias": 1024,
"transformer.layers.3.2.to_k.weight": 1048576,
"transformer.layers.3.2.to_k.bias": 1024,
"transformer.layers.3.2.to_v.weight": 1048576,
"transformer.layers.3.2.to_v.bias": 1024,
"transformer.layers.3.2.to_out.0.weight": 1048576,
"transformer.layers.3.2.to_out.0.bias": 1024,
"transformer.layers.3.3.g": 1024,
"transformer.layers.3.4.ff.0.0.weight": 4194304,
"transformer.layers.3.4.ff.0.0.bias": 4096,
"transformer.layers.3.4.ff.2.weight": 4194304,
"transformer.layers.3.4.ff.2.bias": 1024,
"transformer.layers.4.1.g": 1024,
"transformer.layers.4.2.to_q.weight": 1048576,
"transformer.layers.4.2.to_q.bias": 1024,
"transformer.layers.4.2.to_k.weight": 1048576,
"transformer.layers.4.2.to_k.bias": 1024,
"transformer.layers.4.2.to_v.weight": 1048576,
"transformer.layers.4.2.to_v.bias": 1024,
"transformer.layers.4.2.to_out.0.weight": 1048576,
"transformer.layers.4.2.to_out.0.bias": 1024,
"transformer.layers.4.3.g": 1024,
"transformer.layers.4.4.ff.0.0.weight": 4194304,
"transformer.layers.4.4.ff.0.0.bias": 4096,
"transformer.layers.4.4.ff.2.weight": 4194304,
"transformer.layers.4.4.ff.2.bias": 1024,
"transformer.layers.5.1.g": 1024,
"transformer.layers.5.2.to_q.weight": 1048576,
"transformer.layers.5.2.to_q.bias": 1024,
"transformer.layers.5.2.to_k.weight": 1048576,
"transformer.layers.5.2.to_k.bias": 1024,
"transformer.layers.5.2.to_v.weight": 1048576,
"transformer.layers.5.2.to_v.bias": 1024,
"transformer.layers.5.2.to_out.0.weight": 1048576,
"transformer.layers.5.2.to_out.0.bias": 1024,
"transformer.layers.5.3.g": 1024,
"transformer.layers.5.4.ff.0.0.weight": 4194304,
"transformer.layers.5.4.ff.0.0.bias": 4096,
"transformer.layers.5.4.ff.2.weight": 4194304,
"transformer.layers.5.4.ff.2.bias": 1024,
"transformer.layers.6.1.g": 1024,
"transformer.layers.6.2.to_q.weight": 1048576,
"transformer.layers.6.2.to_q.bias": 1024,
"transformer.layers.6.2.to_k.weight": 1048576,
"transformer.layers.6.2.to_k.bias": 1024,
"transformer.layers.6.2.to_v.weight": 1048576,
"transformer.layers.6.2.to_v.bias": 1024,
"transformer.layers.6.2.to_out.0.weight": 1048576,
"transformer.layers.6.2.to_out.0.bias": 1024,
"transformer.layers.6.3.g": 1024,
"transformer.layers.6.4.ff.0.0.weight": 4194304,
"transformer.layers.6.4.ff.0.0.bias": 4096,
"transformer.layers.6.4.ff.2.weight": 4194304,
"transformer.layers.6.4.ff.2.bias": 1024,
"transformer.layers.7.1.g": 1024,
"transformer.layers.7.2.to_q.weight": 1048576,
"transformer.layers.7.2.to_q.bias": 1024,
"transformer.layers.7.2.to_k.weight": 1048576,
"transformer.layers.7.2.to_k.bias": 1024,
"transformer.layers.7.2.to_v.weight": 1048576,
"transformer.layers.7.2.to_v.bias": 1024,
"transformer.layers.7.2.to_out.0.weight": 1048576,
"transformer.layers.7.2.to_out.0.bias": 1024,
"transformer.layers.7.3.g": 1024,
"transformer.layers.7.4.ff.0.0.weight": 4194304,
"transformer.layers.7.4.ff.0.0.bias": 4096,
"transformer.layers.7.4.ff.2.weight": 4194304,
"transformer.layers.7.4.ff.2.bias": 1024,
"transformer.layers.8.1.g": 1024,
"transformer.layers.8.2.to_q.weight": 1048576,
"transformer.layers.8.2.to_q.bias": 1024,
"transformer.layers.8.2.to_k.weight": 1048576,
"transformer.layers.8.2.to_k.bias": 1024,
"transformer.layers.8.2.to_v.weight": 1048576,
"transformer.layers.8.2.to_v.bias": 1024,
"transformer.layers.8.2.to_out.0.weight": 1048576,
"transformer.layers.8.2.to_out.0.bias": 1024,
"transformer.layers.8.3.g": 1024,
"transformer.layers.8.4.ff.0.0.weight": 4194304,
"transformer.layers.8.4.ff.0.0.bias": 4096,
"transformer.layers.8.4.ff.2.weight": 4194304,
"transformer.layers.8.4.ff.2.bias": 1024,
"transformer.layers.9.1.g": 1024,
"transformer.layers.9.2.to_q.weight": 1048576,
"transformer.layers.9.2.to_q.bias": 1024,
"transformer.layers.9.2.to_k.weight": 1048576,
"transformer.layers.9.2.to_k.bias": 1024,
"transformer.layers.9.2.to_v.weight": 1048576,
"transformer.layers.9.2.to_v.bias": 1024,
"transformer.layers.9.2.to_out.0.weight": 1048576,
"transformer.layers.9.2.to_out.0.bias": 1024,
"transformer.layers.9.3.g": 1024,
"transformer.layers.9.4.ff.0.0.weight": 4194304,
"transformer.layers.9.4.ff.0.0.bias": 4096,
"transformer.layers.9.4.ff.2.weight": 4194304,
"transformer.layers.9.4.ff.2.bias": 1024,
"transformer.layers.10.1.g": 1024,
"transformer.layers.10.2.to_q.weight": 1048576,
"transformer.layers.10.2.to_q.bias": 1024,
"transformer.layers.10.2.to_k.weight": 1048576,
"transformer.layers.10.2.to_k.bias": 1024,
"transformer.layers.10.2.to_v.weight": 1048576,
"transformer.layers.10.2.to_v.bias": 1024,
"transformer.layers.10.2.to_out.0.weight": 1048576,
"transformer.layers.10.2.to_out.0.bias": 1024,
"transformer.layers.10.3.g": 1024,
"transformer.layers.10.4.ff.0.0.weight": 4194304,
"transformer.layers.10.4.ff.0.0.bias": 4096,
"transformer.layers.10.4.ff.2.weight": 4194304,
"transformer.layers.10.4.ff.2.bias": 1024,
"transformer.layers.11.1.g": 1024,
"transformer.layers.11.2.to_q.weight": 1048576,
"transformer.layers.11.2.to_q.bias": 1024,
"transformer.layers.11.2.to_k.weight": 1048576,
"transformer.layers.11.2.to_k.bias": 1024,
"transformer.layers.11.2.to_v.weight": 1048576,
"transformer.layers.11.2.to_v.bias": 1024,
"transformer.layers.11.2.to_out.0.weight": 1048576,
"transformer.layers.11.2.to_out.0.bias": 1024,
"transformer.layers.11.3.g": 1024,
"transformer.layers.11.4.ff.0.0.weight": 4194304,
"transformer.layers.11.4.ff.0.0.bias": 4096,
"transformer.layers.11.4.ff.2.weight": 4194304,
"transformer.layers.11.4.ff.2.bias": 1024,
"transformer.layers.12.1.g": 1024,
"transformer.layers.12.2.to_q.weight": 1048576,
"transformer.layers.12.2.to_q.bias": 1024,
"transformer.layers.12.2.to_k.weight": 1048576,
"transformer.layers.12.2.to_k.bias": 1024,
"transformer.layers.12.2.to_v.weight": 1048576,
"transformer.layers.12.2.to_v.bias": 1024,
"transformer.layers.12.2.to_out.0.weight": 1048576,
"transformer.layers.12.2.to_out.0.bias": 1024,
"transformer.layers.12.3.g": 1024,
"transformer.layers.12.4.ff.0.0.weight": 4194304,
"transformer.layers.12.4.ff.0.0.bias": 4096,
"transformer.layers.12.4.ff.2.weight": 4194304,
"transformer.layers.12.4.ff.2.bias": 1024,
"transformer.layers.13.0.weight": 2097152,
"transformer.layers.13.1.g": 1024,
"transformer.layers.13.2.to_q.weight": 1048576,
"transformer.layers.13.2.to_q.bias": 1024,
"transformer.layers.13.2.to_k.weight": 1048576,
"transformer.layers.13.2.to_k.bias": 1024,
"transformer.layers.13.2.to_v.weight": 1048576,
"transformer.layers.13.2.to_v.bias": 1024,
"transformer.layers.13.2.to_out.0.weight": 1048576,
"transformer.layers.13.2.to_out.0.bias": 1024,
"transformer.layers.13.3.g": 1024,
"transformer.layers.13.4.ff.0.0.weight": 4194304,
"transformer.layers.13.4.ff.0.0.bias": 4096,
"transformer.layers.13.4.ff.2.weight": 4194304,
"transformer.layers.13.4.ff.2.bias": 1024,
"transformer.layers.14.0.weight": 2097152,
"transformer.layers.14.1.g": 1024,
"transformer.layers.14.2.to_q.weight": 1048576,
"transformer.layers.14.2.to_q.bias": 1024,
"transformer.layers.14.2.to_k.weight": 1048576,
"transformer.layers.14.2.to_k.bias": 1024,
"transformer.layers.14.2.to_v.weight": 1048576,
"transformer.layers.14.2.to_v.bias": 1024,
"transformer.layers.14.2.to_out.0.weight": 1048576,
"transformer.layers.14.2.to_out.0.bias": 1024,
"transformer.layers.14.3.g": 1024,
"transformer.layers.14.4.ff.0.0.weight": 4194304,
"transformer.layers.14.4.ff.0.0.bias": 4096,
"transformer.layers.14.4.ff.2.weight": 4194304,
"transformer.layers.14.4.ff.2.bias": 1024,
"transformer.layers.15.0.weight": 2097152,
"transformer.layers.15.1.g": 1024,
"transformer.layers.15.2.to_q.weight": 1048576,
"transformer.layers.15.2.to_q.bias": 1024,
"transformer.layers.15.2.to_k.weight": 1048576,
"transformer.layers.15.2.to_k.bias": 1024,
"transformer.layers.15.2.to_v.weight": 1048576,
"transformer.layers.15.2.to_v.bias": 1024,
"transformer.layers.15.2.to_out.0.weight": 1048576,
"transformer.layers.15.2.to_out.0.bias": 1024,
"transformer.layers.15.3.g": 1024,
"transformer.layers.15.4.ff.0.0.weight": 4194304,
"transformer.layers.15.4.ff.0.0.bias": 4096,
"transformer.layers.15.4.ff.2.weight": 4194304,
"transformer.layers.15.4.ff.2.bias": 1024,
"transformer.layers.16.0.weight": 2097152,
"transformer.layers.16.1.g": 1024,
"transformer.layers.16.2.to_q.weight": 1048576,
"transformer.layers.16.2.to_q.bias": 1024,
"transformer.layers.16.2.to_k.weight": 1048576,
"transformer.layers.16.2.to_k.bias": 1024,
"transformer.layers.16.2.to_v.weight": 1048576,
"transformer.layers.16.2.to_v.bias": 1024,
"transformer.layers.16.2.to_out.0.weight": 1048576,
"transformer.layers.16.2.to_out.0.bias": 1024,
"transformer.layers.16.3.g": 1024,
"transformer.layers.16.4.ff.0.0.weight": 4194304,
"transformer.layers.16.4.ff.0.0.bias": 4096,
"transformer.layers.16.4.ff.2.weight": 4194304,
"transformer.layers.16.4.ff.2.bias": 1024,
"transformer.layers.17.0.weight": 2097152,
"transformer.layers.17.1.g": 1024,
"transformer.layers.17.2.to_q.weight": 1048576,
"transformer.layers.17.2.to_q.bias": 1024,
"transformer.layers.17.2.to_k.weight": 1048576,
"transformer.layers.17.2.to_k.bias": 1024,
"transformer.layers.17.2.to_v.weight": 1048576,
"transformer.layers.17.2.to_v.bias": 1024,
"transformer.layers.17.2.to_out.0.weight": 1048576,
"transformer.layers.17.2.to_out.0.bias": 1024,
"transformer.layers.17.3.g": 1024,
"transformer.layers.17.4.ff.0.0.weight": 4194304,
"transformer.layers.17.4.ff.0.0.bias": 4096,
"transformer.layers.17.4.ff.2.weight": 4194304,
"transformer.layers.17.4.ff.2.bias": 1024,
"transformer.layers.18.0.weight": 2097152,
"transformer.layers.18.1.g": 1024,
"transformer.layers.18.2.to_q.weight": 1048576,
"transformer.layers.18.2.to_q.bias": 1024,
"transformer.layers.18.2.to_k.weight": 1048576,
"transformer.layers.18.2.to_k.bias": 1024,
"transformer.layers.18.2.to_v.weight": 1048576,
"transformer.layers.18.2.to_v.bias": 1024,
"transformer.layers.18.2.to_out.0.weight": 1048576,
"transformer.layers.18.2.to_out.0.bias": 1024,
"transformer.layers.18.3.g": 1024,
"transformer.layers.18.4.ff.0.0.weight": 4194304,
"transformer.layers.18.4.ff.0.0.bias": 4096,
"transformer.layers.18.4.ff.2.weight": 4194304,
"transformer.layers.18.4.ff.2.bias": 1024,
"transformer.layers.19.0.weight": 2097152,
"transformer.layers.19.1.g": 1024,
"transformer.layers.19.2.to_q.weight": 1048576,
"transformer.layers.19.2.to_q.bias": 1024,
"transformer.layers.19.2.to_k.weight": 1048576,
"transformer.layers.19.2.to_k.bias": 1024,
"transformer.layers.19.2.to_v.weight": 1048576,
"transformer.layers.19.2.to_v.bias": 1024,
"transformer.layers.19.2.to_out.0.weight": 1048576,
"transformer.layers.19.2.to_out.0.bias": 1024,
"transformer.layers.19.3.g": 1024,
"transformer.layers.19.4.ff.0.0.weight": 4194304,
"transformer.layers.19.4.ff.0.0.bias": 4096,
"transformer.layers.19.4.ff.2.weight": 4194304,
"transformer.layers.19.4.ff.2.bias": 1024,
"transformer.layers.20.0.weight": 2097152,
"transformer.layers.20.1.g": 1024,
"transformer.layers.20.2.to_q.weight": 1048576,
"transformer.layers.20.2.to_q.bias": 1024,
"transformer.layers.20.2.to_k.weight": 1048576,
"transformer.layers.20.2.to_k.bias": 1024,
"transformer.layers.20.2.to_v.weight": 1048576,
"transformer.layers.20.2.to_v.bias": 1024,
"transformer.layers.20.2.to_out.0.weight": 1048576,
"transformer.layers.20.2.to_out.0.bias": 1024,
"transformer.layers.20.3.g": 1024,
"transformer.layers.20.4.ff.0.0.weight": 4194304,
"transformer.layers.20.4.ff.0.0.bias": 4096,
"transformer.layers.20.4.ff.2.weight": 4194304,
"transformer.layers.20.4.ff.2.bias": 1024,
"transformer.layers.21.0.weight": 2097152,
"transformer.layers.21.1.g": 1024,
"transformer.layers.21.2.to_q.weight": 1048576,
"transformer.layers.21.2.to_q.bias": 1024,
"transformer.layers.21.2.to_k.weight": 1048576,
"transformer.layers.21.2.to_k.bias": 1024,
"transformer.layers.21.2.to_v.weight": 1048576,
"transformer.layers.21.2.to_v.bias": 1024,
"transformer.layers.21.2.to_out.0.weight": 1048576,
"transformer.layers.21.2.to_out.0.bias": 1024,
"transformer.layers.21.3.g": 1024,
"transformer.layers.21.4.ff.0.0.weight": 4194304,
"transformer.layers.21.4.ff.0.0.bias": 4096,
"transformer.layers.21.4.ff.2.weight": 4194304,
"transformer.layers.21.4.ff.2.bias": 1024,
"transformer.layers.22.0.weight": 2097152,
"transformer.layers.22.1.g": 1024,
"transformer.layers.22.2.to_q.weight": 1048576,
"transformer.layers.22.2.to_q.bias": 1024,
"transformer.layers.22.2.to_k.weight": 1048576,
"transformer.layers.22.2.to_k.bias": 1024,
"transformer.layers.22.2.to_v.weight": 1048576,
"transformer.layers.22.2.to_v.bias": 1024,
"transformer.layers.22.2.to_out.0.weight": 1048576,
"transformer.layers.22.2.to_out.0.bias": 1024,
"transformer.layers.22.3.g": 1024,
"transformer.layers.22.4.ff.0.0.weight": 4194304,
"transformer.layers.22.4.ff.0.0.bias": 4096,
"transformer.layers.22.4.ff.2.weight": 4194304,
"transformer.layers.22.4.ff.2.bias": 1024,
"transformer.layers.23.0.weight": 2097152,
"transformer.layers.23.1.g": 1024,
"transformer.layers.23.2.to_q.weight": 1048576,
"transformer.layers.23.2.to_q.bias": 1024,
"transformer.layers.23.2.to_k.weight": 1048576,
"transformer.layers.23.2.to_k.bias": 1024,
"transformer.layers.23.2.to_v.weight": 1048576,
"transformer.layers.23.2.to_v.bias": 1024,
"transformer.layers.23.2.to_out.0.weight": 1048576,
"transformer.layers.23.2.to_out.0.bias": 1024,
"transformer.layers.23.3.g": 1024,
"transformer.layers.23.4.ff.0.0.weight": 4194304,
"transformer.layers.23.4.ff.0.0.bias": 4096,
"transformer.layers.23.4.ff.2.weight": 4194304,
"transformer.layers.23.4.ff.2.bias": 1024,
"transformer.layers.24.0.weight": 2097152,
"transformer.layers.24.1.g": 1024,
"transformer.layers.24.2.to_q.weight": 1048576,
"transformer.layers.24.2.to_q.bias": 1024,
"transformer.layers.24.2.to_k.weight": 1048576,
"transformer.layers.24.2.to_k.bias": 1024,
"transformer.layers.24.2.to_v.weight": 1048576,
"transformer.layers.24.2.to_v.bias": 1024,
"transformer.layers.24.2.to_out.0.weight": 1048576,
"transformer.layers.24.2.to_out.0.bias": 1024,
"transformer.layers.24.3.g": 1024,
"transformer.layers.24.4.ff.0.0.weight": 4194304,
"transformer.layers.24.4.ff.0.0.bias": 4096,
"transformer.layers.24.4.ff.2.weight": 4194304,
"transformer.layers.24.4.ff.2.bias": 1024,
"transformer.layers.25.0.weight": 2097152,
"transformer.layers.25.1.g": 1024,
"transformer.layers.25.2.to_q.weight": 1048576,
"transformer.layers.25.2.to_q.bias": 1024,
"transformer.layers.25.2.to_k.weight": 1048576,
"transformer.layers.25.2.to_k.bias": 1024,
"transformer.layers.25.2.to_v.weight": 1048576,
"transformer.layers.25.2.to_v.bias": 1024,
"transformer.layers.25.2.to_out.0.weight": 1048576,
"transformer.layers.25.2.to_out.0.bias": 1024,
"transformer.layers.25.3.g": 1024,
"transformer.layers.25.4.ff.0.0.weight": 4194304,
"transformer.layers.25.4.ff.0.0.bias": 4096,
"transformer.layers.25.4.ff.2.weight": 4194304,
"transformer.layers.25.4.ff.2.bias": 1024,
"transformer.norm_out.g": 1024,
"transformer.proj_out.weight": 102400,
"transformer.proj_out.bias": 100
},
"important_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
],
"bottleneck_layers": [],
"recommendations": {
"focus_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
]
},
"total_parameters": 391,
"total_elements": 360755948,
"param_ranges": {
"transformer.time_embed.time_mlp.0.weight": {
"min": -0.43031466007232666,
"max": 0.298143208026886,
"mean": -0.0025431362446397543,
"std": 0.042562514543533325,
"sparsity": 0.0,
"shape": [
1024,
256
]
},
"transformer.time_embed.time_mlp.0.bias": {
"min": -0.0631568506360054,
"max": 0.10771193355321884,
"mean": 0.0006426331819966435,
"std": 0.03407834470272064,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.time_embed.time_mlp.2.weight": {
"min": -0.4127056896686554,
"max": 0.8369137644767761,
"mean": -0.00020141302957199514,
"std": 0.024111632257699966,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.time_embed.time_mlp.2.bias": {
"min": -0.11548846960067749,
"max": 0.3221578299999237,
"mean": -0.0009410656057298183,
"std": 0.019580261781811714,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.text_embed.text_embed.weight": {
"min": -2.7946255207061768,
"max": 2.873370885848999,
"mean": -0.0003634353051893413,
"std": 0.6154844164848328,
"sparsity": 0.0,
"shape": [
2546,
100
]
},
"transformer.input_embed.proj.weight": {
"min": -0.2794482707977295,
"max": 0.38173243403434753,
"mean": 0.0004242636787239462,
"std": 0.042748358100652695,
"sparsity": 0.0,
"shape": [
1024,
300
]
},
"transformer.input_embed.proj.bias": {
"min": -0.22289495170116425,
"max": 0.21001911163330078,
"mean": -0.004489608108997345,
"std": 0.040950216352939606,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": {
"min": -0.4283224046230316,
"max": 0.4761110544204712,
"mean": 3.962942628277233e-06,
"std": 0.02451062761247158,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": {
"min": -0.32575708627700806,
"max": 0.1571168750524521,
"mean": -0.04673216491937637,
"std": 0.051645807921886444,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": {
"min": -0.4105567932128906,
"max": 0.3547790050506592,
"mean": -0.0001310346560785547,
"std": 0.02360442653298378,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": {
"min": -0.23018451035022736,
"max": 0.2630932033061981,
"mean": -0.029156308621168137,
"std": 0.04940544068813324,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.1.g": {
"min": 0.2545531988143921,
"max": 0.8213090300559998,
"mean": 0.5256362557411194,
"std": 0.08106369525194168,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_q.weight": {
"min": -0.2971626818180084,
"max": 0.26604607701301575,
"mean": -0.0004256928223185241,
"std": 0.03210251033306122,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_q.bias": {
"min": -0.09291917830705643,
"max": 0.1250312328338623,
"mean": 0.0006477435817942023,
"std": 0.025753259658813477,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_k.weight": {
"min": -0.29085373878479004,
"max": 0.28159603476524353,
"mean": -7.506589463446289e-05,
"std": 0.030931703746318817,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_k.bias": {
"min": -5.906967639923096,
"max": 5.821649074554443,
"mean": -0.009350163862109184,
"std": 1.296647071838379,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_v.weight": {
"min": -0.42530331015586853,
"max": 0.3440260589122772,
"mean": 9.807322931010276e-05,
"std": 0.02995346300303936,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_v.bias": {
"min": -0.029081525281071663,
"max": 0.02767445333302021,
"mean": -0.00032374687725678086,
"std": 0.012576405890285969,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_out.0.weight": {
"min": -0.45424115657806396,
"max": 0.4482896625995636,
"mean": 2.3885608243290335e-05,
"std": 0.02385384775698185,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_out.0.bias": {
"min": -0.08883396536111832,
"max": 0.09114022552967072,
"mean": 0.00228882092051208,
"std": 0.01952745020389557,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.3.g": {
"min": 0.26677191257476807,
"max": 1.0577468872070312,
"mean": 0.53135746717453,
"std": 0.10473316162824631,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.4.ff.0.0.weight": {
"min": -0.5746102333068848,
"max": 0.6084363460540771,
"mean": -0.00043127068784087896,
"std": 0.03860073536634445,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.0.4.ff.0.0.bias": {
"min": -0.18297578394412994,
"max": 0.0456179715692997,
"mean": -0.029477983713150024,
"std": 0.042657021433115005,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.0.4.ff.2.weight": {
"min": -1.1673263311386108,
"max": 1.6341116428375244,
"mean": 0.00032315164571627975,
"std": 0.02769668586552143,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.0.4.ff.2.bias": {
"min": -0.16250400245189667,
"max": 0.20589958131313324,
"mean": -0.02113456465303898,
"std": 0.027959568426012993,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.1.g": {
"min": 0.22410069406032562,
"max": 0.8451111912727356,
"mean": 0.48777928948402405,
"std": 0.07542530447244644,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_q.weight": {
"min": -0.25582441687583923,
"max": 0.30595168471336365,
"mean": -6.705071427859366e-06,
"std": 0.03347504884004593,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_q.bias": {
"min": -0.09550327807664871,
"max": 0.11064136773347855,
"mean": 6.668796413578093e-05,
"std": 0.026976482942700386,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_k.weight": {
"min": -0.2973037660121918,
"max": 0.29644775390625,
"mean": 5.341449286788702e-05,
"std": 0.032546162605285645,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_k.bias": {
"min": -5.17097806930542,
"max": 5.091113090515137,
"mean": -0.01462231483310461,
"std": 1.1586002111434937,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_v.weight": {
"min": -0.34501704573631287,
"max": 0.34340131282806396,
"mean": 7.8546792792622e-05,
"std": 0.030061908066272736,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_v.bias": {
"min": -0.036109186708927155,
"max": 0.03340720757842064,
"mean": -0.00014173206000123173,
"std": 0.013041709549725056,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_out.0.weight": {
"min": -0.3156168460845947,
"max": 0.3752053380012512,
"mean": -2.0681722162407823e-05,
"std": 0.02405940182507038,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_out.0.bias": {
"min": -0.10555326193571091,
"max": 0.12231862545013428,
"mean": -0.0019678983371704817,
"std": 0.028872456401586533,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.3.g": {
"min": 0.3113996386528015,
"max": 1.1224051713943481,
"mean": 0.6664633750915527,
"std": 0.0980152115225792,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.4.ff.0.0.weight": {
"min": -0.8727887272834778,
"max": 0.6275914907455444,
"mean": 0.0016750607173889875,
"std": 0.047438763082027435,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.1.4.ff.0.0.bias": {
"min": -0.27183517813682556,
"max": 0.034259725362062454,
"mean": -0.046628981828689575,
"std": 0.04063701629638672,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.1.4.ff.2.weight": {
"min": -0.9230329394340515,
"max": 0.9648618102073669,
"mean": 0.0010213888017460704,
"std": 0.04070665314793587,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.1.4.ff.2.bias": {
"min": -0.14468412101268768,
"max": 0.07505139708518982,
"mean": -0.009096229448914528,
"std": 0.025706371292471886,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.1.g": {
"min": 0.24036771059036255,
"max": 0.7140315771102905,
"mean": 0.4473647475242615,
"std": 0.05951203405857086,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_q.weight": {
"min": -0.27264565229415894,
"max": 0.29809534549713135,
"mean": 9.332510671811178e-06,
"std": 0.03546958044171333,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_q.bias": {
"min": -0.11950661987066269,
"max": 0.11869802325963974,
"mean": 0.0007616454968228936,
"std": 0.02764517441391945,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_k.weight": {
"min": -0.2813079059123993,
"max": 0.28023794293403625,
"mean": -7.719700079178438e-05,
"std": 0.0350990891456604,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_k.bias": {
"min": -2.5128581523895264,
"max": 2.524867296218872,
"mean": 0.026786239817738533,
"std": 0.5873143672943115,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_v.weight": {
"min": -0.2213059961795807,
"max": 0.2717853784561157,
"mean": 2.9610819183290005e-06,
"std": 0.030732687562704086,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_v.bias": {
"min": -0.03361261636018753,
"max": 0.03129349276423454,
"mean": 0.00011305588122922927,
"std": 0.012413612566888332,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_out.0.weight": {
"min": -0.23544403910636902,
"max": 0.23186075687408447,
"mean": 5.69116891711019e-05,
"std": 0.025696195662021637,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_out.0.bias": {
"min": -0.13601461052894592,
"max": 0.12754406034946442,
"mean": -0.005499254446476698,
"std": 0.03998684883117676,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.3.g": {
"min": 0.35436785221099854,
"max": 1.1737076044082642,
"mean": 0.7108283638954163,
"std": 0.10403098911046982,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.4.ff.0.0.weight": {
"min": -0.6176053881645203,
"max": 0.5545136332511902,
"mean": 0.0011602240847423673,
"std": 0.04611964151263237,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.2.4.ff.0.0.bias": {
"min": -0.18876661360263824,
"max": 0.024967461824417114,
"mean": -0.03485583886504173,
"std": 0.028641268610954285,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.2.4.ff.2.weight": {
"min": -1.1323436498641968,
"max": 0.9720706939697266,
"mean": 0.00035946519346907735,
"std": 0.042347442358732224,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.2.4.ff.2.bias": {
"min": -0.598772406578064,
"max": 0.06287988275289536,
"mean": -0.004880873020738363,
"std": 0.028635544702410698,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.1.g": {
"min": 0.3750710189342499,
"max": 0.9418790340423584,
"mean": 0.5926927328109741,
"std": 0.06721659004688263,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_q.weight": {
"min": -0.3915771543979645,
"max": 0.3692559599876404,
"mean": 7.123942486941814e-05,
"std": 0.03718866407871246,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_q.bias": {
"min": -0.11907870322465897,
"max": 0.13665802776813507,
"mean": 0.0009319179225713015,
"std": 0.02926611341536045,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_k.weight": {
"min": -0.619708240032196,
"max": 0.5092929005622864,
"mean": 1.5245183021761477e-05,
"std": 0.03644217178225517,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_k.bias": {
"min": -8.197783470153809,
"max": 8.800565719604492,
"mean": -0.10938873887062073,
"std": 1.7007076740264893,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_v.weight": {
"min": -0.2768491804599762,
"max": 0.2400088757276535,
"mean": 5.314283407642506e-05,
"std": 0.032615404576063156,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_v.bias": {
"min": -0.05213421210646629,
"max": 0.03957239165902138,
"mean": 9.133941057370976e-05,
"std": 0.012963276356458664,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_out.0.weight": {
"min": -0.23089444637298584,
"max": 0.2348451018333435,
"mean": -2.176157067879103e-05,
"std": 0.029391760006546974,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_out.0.bias": {
"min": -0.20456741750240326,
"max": 0.10572919249534607,
"mean": -0.00402758177369833,
"std": 0.03263704851269722,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.3.g": {
"min": 0.3400026261806488,
"max": 1.0141218900680542,
"mean": 0.7010252475738525,
"std": 0.09696138650178909,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.4.ff.0.0.weight": {
"min": -0.5654259324073792,
"max": 0.8335409760475159,
"mean": 0.0004151407047174871,
"std": 0.04230234771966934,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.3.4.ff.0.0.bias": {
"min": -0.2119237780570984,
"max": 0.030580509454011917,
"mean": -0.03220224380493164,
"std": 0.026535935699939728,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.3.4.ff.2.weight": {
"min": -0.7552511096000671,
"max": 0.7191816568374634,
"mean": -9.422379662282765e-06,
"std": 0.036842163652181625,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.3.4.ff.2.bias": {
"min": -0.26383838057518005,
"max": 0.10599514842033386,
"mean": -0.0030335707124322653,
"std": 0.028880203142762184,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.1.g": {
"min": 0.28429752588272095,
"max": 0.6961002945899963,
"mean": 0.49966490268707275,
"std": 0.046708256006240845,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_q.weight": {
"min": -0.27950623631477356,
"max": 0.23444026708602905,
"mean": -0.0001112212921725586,
"std": 0.03876311331987381,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_q.bias": {
"min": -0.15468573570251465,
"max": 0.12698474526405334,
"mean": -0.0022345406468957663,
"std": 0.033433251082897186,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_k.weight": {
"min": -0.41459208726882935,
"max": 0.6603645086288452,
"mean": -1.977803731278982e-05,
"std": 0.03910015523433685,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_k.bias": {
"min": -4.243562698364258,
"max": 4.728666305541992,
"mean": -0.020446542650461197,
"std": 1.0085786581039429,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_v.weight": {
"min": -0.24519944190979004,
"max": 0.2077825665473938,
"mean": 4.388581874081865e-05,
"std": 0.033966176211833954,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_v.bias": {
"min": -0.034593358635902405,
"max": 0.04485077038407326,
"mean": -1.7529440810903907e-05,
"std": 0.012629235163331032,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_out.0.weight": {
"min": -0.20095357298851013,
"max": 0.20613527297973633,
"mean": -2.959615085273981e-05,
"std": 0.03102371282875538,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_out.0.bias": {
"min": -0.20019184052944183,
"max": 0.11357004940509796,
"mean": -0.0029205437749624252,
"std": 0.034529101103544235,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.3.g": {
"min": 0.36704930663108826,
"max": 1.058448076248169,
"mean": 0.6707465052604675,
"std": 0.0665469765663147,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.4.ff.0.0.weight": {
"min": -0.3986629843711853,
"max": 0.5028019547462463,
"mean": -3.858951822621748e-05,
"std": 0.04113718494772911,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.4.4.ff.0.0.bias": {
"min": -0.12819068133831024,
"max": 0.026764869689941406,
"mean": -0.03055746480822563,
"std": 0.021891731768846512,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.4.4.ff.2.weight": {
"min": -0.44944334030151367,
"max": 0.43338072299957275,
"mean": 8.373618766199797e-05,
"std": 0.03489609435200691,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.4.4.ff.2.bias": {
"min": -0.2679402530193329,
"max": 0.07267966121435165,
"mean": -0.0011121004354208708,
"std": 0.023136794567108154,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.1.g": {
"min": 0.2874027192592621,
"max": 0.6862822771072388,
"mean": 0.5247019529342651,
"std": 0.047706179320812225,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_q.weight": {
"min": -0.22247114777565002,
"max": 0.2237931489944458,
"mean": 1.5673409507144243e-05,
"std": 0.03895280137658119,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_q.bias": {
"min": -0.13664273917675018,
"max": 0.10935632139444351,
"mean": 0.00023680762387812138,
"std": 0.029263831675052643,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_k.weight": {
"min": -0.37552782893180847,
"max": 0.43765556812286377,
"mean": -9.529509043204598e-06,
"std": 0.0392889641225338,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_k.bias": {
"min": -3.8507211208343506,
"max": 5.005820274353027,
"mean": 0.00975782610476017,
"std": 0.8459950685501099,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_v.weight": {
"min": -0.2234737128019333,
"max": 0.22026528418064117,
"mean": -2.2568747226614505e-07,
"std": 0.03441343083977699,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_v.bias": {
"min": -0.043700210750103,
"max": 0.0358847938477993,
"mean": -0.0002585579641163349,
"std": 0.012083812616765499,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_out.0.weight": {
"min": -0.21352721750736237,
"max": 0.1891147494316101,
"mean": -1.673133192525711e-05,
"std": 0.031540192663669586,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_out.0.bias": {
"min": -0.18098995089530945,
"max": 0.12096531689167023,
"mean": -0.0024120290763676167,
"std": 0.04128490760922432,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.3.g": {
"min": 0.4226054847240448,
"max": 0.9433368444442749,
"mean": 0.6629081964492798,
"std": 0.056974004954099655,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.4.ff.0.0.weight": {
"min": -0.37151336669921875,
"max": 0.4759024977684021,
"mean": -8.223902113968506e-05,
"std": 0.040896181017160416,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.5.4.ff.0.0.bias": {
"min": -0.20840628445148468,
"max": 0.02712824009358883,
"mean": -0.030254749581217766,
"std": 0.02136547490954399,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.5.4.ff.2.weight": {
"min": -0.3405216336250305,
"max": 0.7342746257781982,
"mean": 8.478653035126626e-05,
"std": 0.03477146103978157,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.5.4.ff.2.bias": {
"min": -0.2405085265636444,
"max": 0.05050582066178322,
"mean": -0.0011980931740254164,
"std": 0.02047325111925602,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.1.g": {
"min": 0.305998831987381,
"max": 0.6545577049255371,
"mean": 0.525275707244873,
"std": 0.0462840236723423,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_q.weight": {
"min": -0.30443698167800903,
"max": 0.2175063043832779,
"mean": 6.991640839260072e-05,
"std": 0.03949848935008049,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_q.bias": {
"min": -0.1496177613735199,
"max": 0.1315852701663971,
"mean": 0.00034793667146004736,
"std": 0.030498284846544266,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_k.weight": {
"min": -0.25779959559440613,
"max": 0.2024526447057724,
"mean": 3.095036663580686e-05,
"std": 0.039487626403570175,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_k.bias": {
"min": -2.3393359184265137,
"max": 2.3790037631988525,
"mean": -0.02626325562596321,
"std": 0.4501512348651886,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_v.weight": {
"min": -0.189274862408638,
"max": 0.2107497602701187,
"mean": 3.7229168810881674e-05,
"std": 0.03479816019535065,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_v.bias": {
"min": -0.03176194056868553,
"max": 0.035539623349905014,
"mean": -0.00020054224296472967,
"std": 0.012292396277189255,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_out.0.weight": {
"min": -0.18866902589797974,
"max": 0.17066700756549835,
"mean": -6.797373498557135e-05,
"std": 0.032174721360206604,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_out.0.bias": {
"min": -0.13956007361412048,
"max": 0.13746821880340576,
"mean": -0.0025175614282488823,
"std": 0.0513296015560627,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.3.g": {
"min": 0.4674248695373535,
"max": 0.957923948764801,
"mean": 0.6691091656684875,
"std": 0.052978649735450745,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.4.ff.0.0.weight": {
"min": -0.32444727420806885,
"max": 0.3098219633102417,
"mean": -1.5040723155834712e-06,
"std": 0.040952056646347046,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.6.4.ff.0.0.bias": {
"min": -0.12495888024568558,
"max": 0.025304077193140984,
"mean": -0.03072468377649784,
"std": 0.019833404570817947,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.6.4.ff.2.weight": {
"min": -0.44051459431648254,
"max": 0.44567734003067017,
"mean": 9.530649549560621e-05,
"std": 0.03512415289878845,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.6.4.ff.2.bias": {
"min": -0.2248658984899521,
"max": 0.05171418562531471,
"mean": -0.0011846581473946571,
"std": 0.018478091806173325,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.1.g": {
"min": 0.33937862515449524,
"max": 0.7403524518013,
"mean": 0.5588580369949341,
"std": 0.041548021137714386,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_q.weight": {
"min": -0.27266961336135864,
"max": 0.2785436511039734,
"mean": 1.9886707377736457e-05,
"std": 0.041062381118535995,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_q.bias": {
"min": -0.13720278441905975,
"max": 0.1400555521249771,
"mean": 0.0004891848657280207,
"std": 0.026654429733753204,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_k.weight": {
"min": -0.4912598729133606,
"max": 0.3564285337924957,
"mean": 8.880282985046506e-05,
"std": 0.040700383484363556,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_k.bias": {
"min": -2.3000996112823486,
"max": 1.7473976612091064,
"mean": -0.021102074533700943,
"std": 0.5005303025245667,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_v.weight": {
"min": -0.21771195530891418,
"max": 0.19800876080989838,
"mean": -4.054907913086936e-05,
"std": 0.03423738107085228,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_v.bias": {
"min": -0.04137791320681572,
"max": 0.03871942684054375,
"mean": -0.00014505225408356637,
"std": 0.012883453629910946,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_out.0.weight": {
"min": -0.1777161806821823,
"max": 0.1839223951101303,
"mean": 4.761077434523031e-05,
"std": 0.03156030550599098,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_out.0.bias": {
"min": -0.1801164597272873,
"max": 0.18409180641174316,
"mean": -0.002218745881691575,
"std": 0.05486130341887474,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.3.g": {
"min": 0.4742484390735626,
"max": 1.027018666267395,
"mean": 0.6454694271087646,
"std": 0.050571199506521225,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.4.ff.0.0.weight": {
"min": -0.27197960019111633,
"max": 0.3094431757926941,
"mean": 0.00011241070023970678,
"std": 0.0406884104013443,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.7.4.ff.0.0.bias": {
"min": -0.10593951493501663,
"max": 0.026867138221859932,
"mean": -0.02952626720070839,
"std": 0.0179454255849123,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.7.4.ff.2.weight": {
"min": -0.339232474565506,
"max": 0.32961946725845337,
"mean": 5.7173179811798036e-05,
"std": 0.03441809490323067,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.7.4.ff.2.bias": {
"min": -0.1818968504667282,
"max": 0.04209613800048828,
"mean": -0.001073765684850514,
"std": 0.017224203795194626,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.1.g": {
"min": 0.3253825902938843,
"max": 0.6876205801963806,
"mean": 0.5113766193389893,
"std": 0.03712678700685501,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_q.weight": {
"min": -0.2340041846036911,
"max": 0.22588428854942322,
"mean": -3.603727600420825e-05,
"std": 0.03918161243200302,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_q.bias": {
"min": -0.11547420918941498,
"max": 0.13177312910556793,
"mean": 0.00015100545715540648,
"std": 0.029211556538939476,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_k.weight": {
"min": -0.353280246257782,
"max": 0.28580334782600403,
"mean": 7.311312401725445e-06,
"std": 0.03925010561943054,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_k.bias": {
"min": -4.137877941131592,
"max": 3.5483016967773438,
"mean": -0.011621923185884953,
"std": 0.6833143830299377,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_v.weight": {
"min": -0.21149367094039917,
"max": 0.20919673144817352,
"mean": 3.474394543445669e-05,
"std": 0.034489404410123825,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_v.bias": {
"min": -0.0357508510351181,
"max": 0.048132169991731644,
"mean": 0.0007945147808641195,
"std": 0.012859269045293331,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_out.0.weight": {
"min": -0.21085014939308167,
"max": 0.19338075816631317,
"mean": -1.279619482374983e-06,
"std": 0.03169989585876465,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_out.0.bias": {
"min": -0.18688012659549713,
"max": 0.17741110920906067,
"mean": -0.0028487846720963717,
"std": 0.05866115912795067,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.3.g": {
"min": 0.4747392237186432,
"max": 1.0433117151260376,
"mean": 0.6515810489654541,
"std": 0.04988763853907585,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.4.ff.0.0.weight": {
"min": -0.2485654354095459,
"max": 0.32921651005744934,
"mean": 0.00018060754518955946,
"std": 0.04057681933045387,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.8.4.ff.0.0.bias": {
"min": -0.12461096793413162,
"max": 0.024597609415650368,
"mean": -0.030512426048517227,
"std": 0.017616724595427513,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.8.4.ff.2.weight": {
"min": -0.42169103026390076,
"max": 0.4825250208377838,
"mean": 2.1487815047294134e-06,
"std": 0.03540307283401489,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.8.4.ff.2.bias": {
"min": -0.15202857553958893,
"max": 0.04342101141810417,
"mean": 3.956547880079597e-05,
"std": 0.014885293319821358,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.1.g": {
"min": 0.315530002117157,
"max": 0.6829717755317688,
"mean": 0.5530707240104675,
"std": 0.04085434973239899,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_q.weight": {
"min": -0.20659124851226807,
"max": 0.2201390564441681,
"mean": 3.096506407018751e-05,
"std": 0.03830333426594734,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_q.bias": {
"min": -0.1380155086517334,
"max": 0.11290067434310913,
"mean": 2.059592225123197e-05,
"std": 0.025836361572146416,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_k.weight": {
"min": -0.40320274233818054,
"max": 0.37160059809684753,
"mean": 2.6222376618534327e-05,
"std": 0.03818517550826073,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_k.bias": {
"min": -3.775665044784546,
"max": 2.872361421585083,
"mean": 0.0011700298637151718,
"std": 0.5173272490501404,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_v.weight": {
"min": -0.2030554711818695,
"max": 0.19753621518611908,
"mean": 2.9474727853084914e-05,
"std": 0.03430046886205673,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_v.bias": {
"min": -0.05103779584169388,
"max": 0.04008523374795914,
"mean": -0.000419780844822526,
"std": 0.013429902493953705,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_out.0.weight": {
"min": -0.19672255218029022,
"max": 0.20196260511875153,
"mean": -1.2339524801063817e-05,
"std": 0.03180818632245064,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_out.0.bias": {
"min": -0.19336175918579102,
"max": 0.19535411894321442,
"mean": -0.0029691390227526426,
"std": 0.06259549409151077,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.3.g": {
"min": 0.34919390082359314,
"max": 1.0855821371078491,
"mean": 0.6673611998558044,
"std": 0.055458005517721176,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.4.ff.0.0.weight": {
"min": -0.22532346844673157,
"max": 0.2517567276954651,
"mean": 0.0003590356500353664,
"std": 0.04076584428548813,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.9.4.ff.0.0.bias": {
"min": -0.09113647788763046,
"max": 0.04372163116931915,
"mean": -0.030099857598543167,
"std": 0.01762346550822258,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.9.4.ff.2.weight": {
"min": -0.3537713587284088,
"max": 0.3043927252292633,
"mean": -4.351784446043894e-05,
"std": 0.03712814301252365,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.9.4.ff.2.bias": {
"min": -0.1622427999973297,
"max": 0.0636076033115387,
"mean": -8.386171248275787e-05,
"std": 0.019415445625782013,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.1.g": {
"min": 0.34875378012657166,
"max": 0.7230772972106934,
"mean": 0.542546272277832,
"std": 0.03922481834888458,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_q.weight": {
"min": -0.21956898272037506,
"max": 0.22326983511447906,
"mean": -1.1109572369605303e-05,
"std": 0.03923607990145683,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_q.bias": {
"min": -0.11858610808849335,
"max": 0.1710456758737564,
"mean": 0.00028452256810851395,
"std": 0.025138530880212784,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_k.weight": {
"min": -0.24716253578662872,
"max": 0.30147185921669006,
"mean": -3.647191624622792e-05,
"std": 0.03893563523888588,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_k.bias": {
"min": -3.5094945430755615,
"max": 3.7191741466522217,
"mean": 0.015858110040426254,
"std": 0.7832505702972412,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_v.weight": {
"min": -0.21879123151302338,
"max": 0.2377484291791916,
"mean": -1.353577317786403e-05,
"std": 0.03630785644054413,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_v.bias": {
"min": -0.04725177586078644,
"max": 0.05147033557295799,
"mean": 0.00048084836453199387,
"std": 0.01352026965469122,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_out.0.weight": {
"min": -0.21421915292739868,
"max": 0.21782870590686798,
"mean": 5.651723040500656e-05,
"std": 0.03361982852220535,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_out.0.bias": {
"min": -0.2116560935974121,
"max": 0.23178474605083466,
"mean": -0.005108034238219261,
"std": 0.06190710514783859,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.3.g": {
"min": 0.3619433343410492,
"max": 1.1028457880020142,
"mean": 0.6994728446006775,
"std": 0.05383099243044853,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.4.ff.0.0.weight": {
"min": -0.2347707897424698,
"max": 0.24507476389408112,
"mean": 0.00046346502494998276,
"std": 0.041274722665548325,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.10.4.ff.0.0.bias": {
"min": -0.098201684653759,
"max": 0.06837960332632065,
"mean": -0.031449105590581894,
"std": 0.01813678629696369,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.10.4.ff.2.weight": {
"min": -0.3019881546497345,
"max": 0.351855993270874,
"mean": -8.162805897882208e-05,
"std": 0.040280550718307495,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.10.4.ff.2.bias": {
"min": -0.1525154411792755,
"max": 0.14985136687755585,
"mean": 0.0002546610194258392,
"std": 0.02304759994149208,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.1.g": {
"min": 0.9988279342651367,
"max": 1.0030174255371094,
"mean": 1.0003814697265625,
"std": 0.0010646688751876354,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_q.weight": {
"min": -0.03128192201256752,
"max": 0.031278640031814575,
"mean": -1.9287415852886625e-05,
"std": 0.01804400235414505,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_q.bias": {
"min": -0.031218387186527252,
"max": 0.03101835958659649,
"mean": -0.0010843591298907995,
"std": 0.01795342192053795,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_k.weight": {
"min": -0.031292207539081573,
"max": 0.03128044679760933,
"mean": 3.544726496329531e-06,
"std": 0.018044408410787582,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_k.bias": {
"min": -0.031148849055171013,
"max": 0.031187163665890694,
"mean": 0.000333936681272462,
"std": 0.01806570589542389,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_out.0.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.11.3.g": {
"min": 0.9988681674003601,
"max": 1.0030490159988403,
"mean": 1.0004115104675293,
"std": 0.0010549556463956833,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.4.ff.0.0.weight": {
"min": -0.031293854117393494,
"max": 0.03129155561327934,
"mean": -8.391638402827084e-06,
"std": 0.018043123185634613,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.11.4.ff.0.0.bias": {
"min": -0.031248562037944794,
"max": 0.03123636171221733,
"mean": 0.00015367052401416004,
"std": 0.017994463443756104,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.11.4.ff.2.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
4096
]
},
"transformer.layers.11.4.ff.2.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.12.1.g": {
"min": 0.382835328578949,
"max": 0.7205657362937927,
"mean": 0.5808628797531128,
"std": 0.03902854025363922,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_q.weight": {
"min": -0.23823925852775574,
"max": 0.1967414915561676,
"mean": 2.6552535928203724e-05,
"std": 0.03746962919831276,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_q.bias": {
"min": -0.11881034076213837,
"max": 0.16626670956611633,
"mean": 0.000991516513749957,
"std": 0.027575215324759483,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_k.weight": {
"min": -0.24632981419563293,
"max": 0.5012024641036987,
"mean": -5.04429881402757e-05,
"std": 0.03762752190232277,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_k.bias": {
"min": -3.946824312210083,
"max": 3.773773670196533,
"mean": -0.0035694693215191364,
"std": 0.6819667816162109,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_v.weight": {
"min": -0.22745896875858307,
"max": 0.2515793740749359,
"mean": -1.1545061170181725e-05,
"std": 0.03743903711438179,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_v.bias": {
"min": -0.07179750502109528,
"max": 0.0807880237698555,
"mean": -0.0005204002372920513,
"std": 0.015668606385588646,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_out.0.weight": {
"min": -0.22822564840316772,
"max": 0.25826144218444824,
"mean": -2.862494147848338e-05,
"std": 0.03542570024728775,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_out.0.bias": {
"min": -0.2006409764289856,
"max": 0.21548894047737122,
"mean": -0.005540885496884584,
"std": 0.06836719810962677,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.3.g": {
"min": 0.40525123476982117,
"max": 1.1910948753356934,
"mean": 0.7381879091262817,
"std": 0.05550322309136391,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.4.ff.0.0.weight": {
"min": -0.2213674634695053,
"max": 0.2461645007133484,
"mean": 0.0005210727686062455,
"std": 0.04134247452020645,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.12.4.ff.0.0.bias": {
"min": -0.10357673466205597,
"max": 0.02419574372470379,
"mean": -0.03268023580312729,
"std": 0.01890200562775135,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.12.4.ff.2.weight": {
"min": -0.44974827766418457,
"max": 0.42273736000061035,
"mean": -0.00043248123256489635,
"std": 0.046903859823942184,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.12.4.ff.2.bias": {
"min": -0.2517695128917694,
"max": 0.4706769287586212,
"mean": 0.003199656493961811,
"std": 0.04457153007388115,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.0.weight": {
"min": -0.3170992434024811,
"max": 0.333298921585083,
"mean": -2.5289473342127167e-05,
"std": 0.021290816366672516,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.13.1.g": {
"min": 0.32478415966033936,
"max": 0.6863877177238464,
"mean": 0.5711605548858643,
"std": 0.04484730586409569,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_q.weight": {
"min": -0.1647796630859375,
"max": 0.17416934669017792,
"mean": -4.8634105041855946e-05,
"std": 0.03318461403250694,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_q.bias": {
"min": -0.1870798021554947,
"max": 0.14308109879493713,
"mean": 3.898901923093945e-05,
"std": 0.02971462905406952,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_k.weight": {
"min": -0.38088855147361755,
"max": 0.2463647872209549,
"mean": -9.938010407495312e-06,
"std": 0.03276585787534714,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_k.bias": {
"min": -3.6601390838623047,
"max": 3.2940189838409424,
"mean": -0.01424746960401535,
"std": 0.9857901930809021,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_v.weight": {
"min": -0.2351982444524765,
"max": 0.24773260951042175,
"mean": -1.7793041479308158e-05,
"std": 0.04170281067490578,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_v.bias": {
"min": -0.07287801802158356,
"max": 0.15471716225147247,
"mean": 0.0006660239887423813,
"std": 0.025180837139487267,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_out.0.weight": {
"min": -0.26665613055229187,
"max": 0.24858269095420837,
"mean": -1.5366244042525068e-05,
"std": 0.04014318436384201,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_out.0.bias": {
"min": -0.18983444571495056,
"max": 0.1949683576822281,
"mean": -0.0012304731644690037,
"std": 0.06671547889709473,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.3.g": {
"min": 0.32925084233283997,
"max": 1.0009599924087524,
"mean": 0.7193903923034668,
"std": 0.052590519189834595,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.4.ff.0.0.weight": {
"min": -0.23175209760665894,
"max": 0.24594298005104065,
"mean": 0.00018278483184985816,
"std": 0.04090619832277298,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.13.4.ff.0.0.bias": {
"min": -0.11433617770671844,
"max": 0.018662281334400177,
"mean": -0.04249466210603714,
"std": 0.01887579821050167,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.13.4.ff.2.weight": {
"min": -0.3903564512729645,
"max": 0.4076610803604126,
"mean": -2.190250415878836e-05,
"std": 0.04854064807295799,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.13.4.ff.2.bias": {
"min": -0.694047212600708,
"max": 0.413125216960907,
"mean": 0.000851891003549099,
"std": 0.06033211946487427,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.0.weight": {
"min": 0.0,
"max": 0.9999971389770508,
"mean": 0.0004882798530161381,
"std": 0.022091632708907127,
"sparsity": 0.99951171875,
"shape": [
1024,
2048
]
},
"transformer.layers.14.1.g": {
"min": 0.9987401366233826,
"max": 1.0030049085617065,
"mean": 1.0003970861434937,
"std": 0.0010890224948525429,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_q.weight": {
"min": -0.03128720819950104,
"max": 0.03127748519182205,
"mean": -2.1021871361881495e-05,
"std": 0.018035341054201126,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_q.bias": {
"min": -0.031208951026201248,
"max": 0.0312366746366024,
"mean": -0.0006772055057808757,
"std": 0.01782999187707901,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_k.weight": {
"min": -0.03128505125641823,
"max": 0.0312827005982399,
"mean": -8.840423106448725e-06,
"std": 0.01803436689078808,
"sparsity": 9.5367431640625e-07,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_k.bias": {
"min": -0.031223762780427933,
"max": 0.031257808208465576,
"mean": -0.0007298105047084391,
"std": 0.017944179475307465,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_out.0.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.14.3.g": {
"min": 0.9988026022911072,
"max": 1.0031852722167969,
"mean": 1.0003986358642578,
"std": 0.0010702211875468493,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.4.ff.0.0.weight": {
"min": -0.03128661960363388,
"max": 0.03128815069794655,
"mean": 3.5941102396463975e-06,
"std": 0.01804072968661785,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.14.4.ff.0.0.bias": {
"min": -0.03123682737350464,
"max": 0.03124977834522724,
"mean": 0.00019563926616683602,
"std": 0.018076641485095024,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.14.4.ff.2.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
4096
]
},
"transformer.layers.14.4.ff.2.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.15.0.weight": {
"min": -0.2346186488866806,
"max": 0.27259576320648193,
"mean": 6.985836080275476e-06,
"std": 0.01881217770278454,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.15.1.g": {
"min": 0.3213435411453247,
"max": 0.6945998072624207,
"mean": 0.5817909240722656,
"std": 0.04608319699764252,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_q.weight": {
"min": -0.18191689252853394,
"max": 0.19781433045864105,
"mean": -1.1746024938474875e-05,
"std": 0.03318719565868378,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_q.bias": {
"min": -0.16086804866790771,
"max": 0.1296302229166031,
"mean": -0.0010684699518606067,
"std": 0.034163739532232285,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_k.weight": {
"min": -0.33239439129829407,
"max": 0.31163647770881653,
"mean": -1.0337707863072865e-05,
"std": 0.03223792091012001,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_k.bias": {
"min": -7.812414169311523,
"max": 8.773359298706055,
"mean": 0.09355923533439636,
"std": 1.6210812330245972,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_v.weight": {
"min": -0.23393671214580536,
"max": 0.24211150407791138,
"mean": 4.141662793699652e-05,
"std": 0.04086197167634964,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_v.bias": {
"min": -0.07609452307224274,
"max": 0.06586258113384247,
"mean": 0.00047865102533251047,
"std": 0.01942458190023899,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_out.0.weight": {
"min": -0.24614335596561432,
"max": 0.23432280123233795,
"mean": -2.907749149017036e-06,
"std": 0.03943663462996483,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_out.0.bias": {
"min": -0.16305704414844513,
"max": 0.1610053926706314,
"mean": 0.0016310829669237137,
"std": 0.06529799103736877,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.3.g": {
"min": 0.5568323135375977,
"max": 0.9453117847442627,
"mean": 0.7130987644195557,
"std": 0.040391918271780014,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.4.ff.0.0.weight": {
"min": -0.2288832664489746,
"max": 0.25533148646354675,
"mean": -4.5479209802579135e-05,
"std": 0.04058132693171501,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.15.4.ff.0.0.bias": {
"min": -0.13495926558971405,
"max": 0.022289777174592018,
"mean": -0.0413689985871315,
"std": 0.018403179943561554,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.15.4.ff.2.weight": {
"min": -0.4220907390117645,
"max": 0.3925161063671112,
"mean": -4.4413791329134256e-06,
"std": 0.04779106378555298,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.15.4.ff.2.bias": {
"min": -0.6081769466400146,
"max": 0.652148425579071,
"mean": 0.001585810212418437,
"std": 0.05687166377902031,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.0.weight": {
"min": -0.2517467141151428,
"max": 0.32074928283691406,
"mean": -6.074779776099604e-06,
"std": 0.019615592435002327,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.16.1.g": {
"min": 0.36013174057006836,
"max": 0.6833459138870239,
"mean": 0.570884644985199,
"std": 0.04308824613690376,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_q.weight": {
"min": -0.22070643305778503,
"max": 0.17717598378658295,
"mean": -3.468842260190286e-05,
"std": 0.03430233895778656,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_q.bias": {
"min": -0.16383720934391022,
"max": 0.23332805931568146,
"mean": 0.0003637468325905502,
"std": 0.032890770584344864,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_k.weight": {
"min": -0.26396337151527405,
"max": 0.2400342971086502,
"mean": -5.2375002269400284e-05,
"std": 0.03390149027109146,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_k.bias": {
"min": -4.860640525817871,
"max": 5.097131252288818,
"mean": 0.04391013830900192,
"std": 1.2302772998809814,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_v.weight": {
"min": -0.24682015180587769,
"max": 0.25062263011932373,
"mean": 7.221732084872201e-05,
"std": 0.043993160128593445,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_v.bias": {
"min": -0.06271186470985413,
"max": 0.05459222570061684,
"mean": 0.0006507715443149209,
"std": 0.017198268324136734,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_out.0.weight": {
"min": -0.2868800759315491,
"max": 0.271938681602478,
"mean": -4.989939043298364e-05,
"std": 0.04299154132604599,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_out.0.bias": {
"min": -0.16084273159503937,
"max": 0.1707206517457962,
"mean": -0.002884692046791315,
"std": 0.059305742383003235,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.3.g": {
"min": 0.51964271068573,
"max": 0.9341827630996704,
"mean": 0.7137263417243958,
"std": 0.038649603724479675,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.4.ff.0.0.weight": {
"min": -0.23825131356716156,
"max": 0.24959467351436615,
"mean": 0.00046492042019963264,
"std": 0.04046143591403961,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.16.4.ff.0.0.bias": {
"min": -0.14443093538284302,
"max": 0.04144603759050369,
"mean": -0.039705902338027954,
"std": 0.020563002675771713,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.16.4.ff.2.weight": {
"min": -0.5333583354949951,
"max": 0.5836927890777588,
"mean": 5.9018666433985345e-06,
"std": 0.048868328332901,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.16.4.ff.2.bias": {
"min": -0.5197700262069702,
"max": 0.4940829873085022,
"mean": 0.0023609776981174946,
"std": 0.05347929149866104,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.0.weight": {
"min": -0.27364596724510193,
"max": 0.3152502179145813,
"mean": 1.8441196516505443e-06,
"std": 0.02005275897681713,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.17.1.g": {
"min": 0.36628827452659607,
"max": 0.7126691937446594,
"mean": 0.5933467149734497,
"std": 0.046086061745882034,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_q.weight": {
"min": -0.21118636429309845,
"max": 0.19975997507572174,
"mean": 3.079167436226271e-05,
"std": 0.0348685048520565,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_q.bias": {
"min": -0.18748052418231964,
"max": 0.2042539119720459,
"mean": 0.000956728239543736,
"std": 0.03154991194605827,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_k.weight": {
"min": -0.28994736075401306,
"max": 0.3401152789592743,
"mean": -4.7362642362713814e-05,
"std": 0.03458964452147484,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_k.bias": {
"min": -3.881758451461792,
"max": 3.3913075923919678,
"mean": 0.014463461004197598,
"std": 0.8590267896652222,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_v.weight": {
"min": -0.22456875443458557,
"max": 0.2500464916229248,
"mean": -3.998348802269902e-06,
"std": 0.042235810309648514,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_v.bias": {
"min": -0.05513551086187363,
"max": 0.046896424144506454,
"mean": -1.89729908015579e-05,
"std": 0.01585385575890541,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_out.0.weight": {
"min": -0.2930184602737427,
"max": 0.2910744249820709,
"mean": -7.35160028852988e-06,
"std": 0.041950810700654984,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_out.0.bias": {
"min": -0.12501806020736694,
"max": 0.2597162425518036,
"mean": -0.003234931267797947,
"std": 0.05317143350839615,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.3.g": {
"min": 0.4562249779701233,
"max": 0.8457176685333252,
"mean": 0.705817699432373,
"std": 0.035453151911497116,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.4.ff.0.0.weight": {
"min": -0.5114459991455078,
"max": 0.3485345244407654,
"mean": 0.0003425978356972337,
"std": 0.04020640254020691,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.17.4.ff.0.0.bias": {
"min": -0.1872977465391159,
"max": 0.039509162306785583,
"mean": -0.03940243646502495,
"std": 0.02136845327913761,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.17.4.ff.2.weight": {
"min": -0.5449100136756897,
"max": 0.5570695400238037,
"mean": -7.181215914897621e-05,
"std": 0.05074289068579674,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.17.4.ff.2.bias": {
"min": -0.5124268531799316,
"max": 0.6651233434677124,
"mean": 0.002447479637339711,
"std": 0.04955451935529709,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.0.weight": {
"min": -0.33246591687202454,
"max": 0.2658751308917999,
"mean": 3.69829467672389e-06,
"std": 0.019390346482396126,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.18.1.g": {
"min": 0.3222673833370209,
"max": 0.7674033641815186,
"mean": 0.6512042284011841,
"std": 0.04545491561293602,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_q.weight": {
"min": -0.2496984899044037,
"max": 0.21969059109687805,
"mean": -2.5450863176956773e-06,
"std": 0.03650245815515518,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_q.bias": {
"min": -0.32755619287490845,
"max": 0.28763604164123535,
"mean": -0.0006797901587560773,
"std": 0.03858839347958565,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_k.weight": {
"min": -0.3103632628917694,
"max": 0.3702820837497711,
"mean": 6.481494347099215e-05,
"std": 0.03624306991696358,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_k.bias": {
"min": -4.7229533195495605,
"max": 5.8144097328186035,
"mean": 0.03798435255885124,
"std": 1.4144145250320435,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_v.weight": {
"min": -0.2220195233821869,
"max": 0.20613467693328857,
"mean": -7.503894448745996e-05,
"std": 0.04249141365289688,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_v.bias": {
"min": -0.07768063247203827,
"max": 0.051408518105745316,
"mean": -0.0009253580356016755,
"std": 0.01641588658094406,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_out.0.weight": {
"min": -0.3309858441352844,
"max": 0.3291884660720825,
"mean": -4.9612558541412e-06,
"std": 0.04279816150665283,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_out.0.bias": {
"min": -0.2853319048881531,
"max": 0.11173354089260101,
"mean": -0.001206716988235712,
"std": 0.04702756926417351,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.3.g": {
"min": 0.48654904961586,
"max": 0.88804692029953,
"mean": 0.7376827001571655,
"std": 0.03842971473932266,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.4.ff.0.0.weight": {
"min": -0.3613007962703705,
"max": 0.27439025044441223,
"mean": 5.118318586028181e-05,
"std": 0.04065314307808876,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.18.4.ff.0.0.bias": {
"min": -0.2479037493467331,
"max": 0.046517688781023026,
"mean": -0.039281267672777176,
"std": 0.023276478052139282,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.18.4.ff.2.weight": {
"min": -0.6269151568412781,
"max": 0.5976049900054932,
"mean": -6.191668217070401e-05,
"std": 0.053125977516174316,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.18.4.ff.2.bias": {
"min": -0.7105785608291626,
"max": 0.26612961292266846,
"mean": 0.0009194647427648306,
"std": 0.051263753324747086,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.0.weight": {
"min": -0.3433726131916046,
"max": 0.3034554719924927,
"mean": 2.0521497390291188e-07,
"std": 0.019139625132083893,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.19.1.g": {
"min": 0.3501395285129547,
"max": 0.783959686756134,
"mean": 0.6390355825424194,
"std": 0.049371764063835144,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_q.weight": {
"min": -0.20602361857891083,
"max": 0.20698852837085724,
"mean": -5.9928101109107956e-05,
"std": 0.037698496133089066,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_q.bias": {
"min": -0.25897642970085144,
"max": 0.268706738948822,
"mean": -0.00040520128095522523,
"std": 0.044660814106464386,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_k.weight": {
"min": -0.35453060269355774,
"max": 0.3229123651981354,
"mean": -7.312092748179566e-06,
"std": 0.03720676898956299,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_k.bias": {
"min": -5.267762184143066,
"max": 4.20961332321167,
"mean": -0.026448804885149002,
"std": 1.0076419115066528,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_v.weight": {
"min": -0.23904970288276672,
"max": 0.24397821724414825,
"mean": -2.552817386458628e-05,
"std": 0.04321575164794922,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_v.bias": {
"min": -0.06249221786856651,
"max": 0.05668818950653076,
"mean": 0.0003517880686558783,
"std": 0.01415390707552433,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_out.0.weight": {
"min": -0.43751028180122375,
"max": 0.3737626075744629,
"mean": 1.4619375178881455e-05,
"std": 0.04412780702114105,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_out.0.bias": {
"min": -0.0962304174900055,
"max": 0.1764947772026062,
"mean": -0.0006597821484319866,
"std": 0.03515012562274933,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.3.g": {
"min": 0.4218544661998749,
"max": 1.0707522630691528,
"mean": 0.7486886978149414,
"std": 0.04222184792160988,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.4.ff.0.0.weight": {
"min": -0.2660444378852844,
"max": 0.2971097230911255,
"mean": -7.88940378697589e-05,
"std": 0.04081380367279053,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.19.4.ff.0.0.bias": {
"min": -0.18505463004112244,
"max": 0.04312760382890701,
"mean": -0.03682396560907364,
"std": 0.025607850402593613,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.19.4.ff.2.weight": {
"min": -0.4577805697917938,
"max": 0.48729538917541504,
"mean": 4.396865551825613e-05,
"std": 0.05422099307179451,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.19.4.ff.2.bias": {
"min": -0.2866191267967224,
"max": 0.5523927807807922,
"mean": -0.0008822673698887229,
"std": 0.04786074161529541,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.0.weight": {
"min": -0.29266098141670227,
"max": 0.3227379322052002,
"mean": 6.034013495082036e-06,
"std": 0.01997271552681923,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.20.1.g": {
"min": 0.2912082076072693,
"max": 0.7611724734306335,
"mean": 0.6509549617767334,
"std": 0.05223819240927696,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_q.weight": {
"min": -0.2437622845172882,
"max": 0.2617740035057068,
"mean": -5.626710844808258e-06,
"std": 0.03961407393217087,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_q.bias": {
"min": -0.2678508758544922,
"max": 0.20037643611431122,
"mean": -0.0008778825285844505,
"std": 0.051807109266519547,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_k.weight": {
"min": -0.2725064158439636,
"max": 0.2540656328201294,
"mean": 5.306316325004445e-06,
"std": 0.03871078044176102,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_k.bias": {
"min": -12.980466842651367,
"max": 15.965588569641113,
"mean": 0.03327019512653351,
"std": 1.9910999536514282,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_v.weight": {
"min": -0.20688198506832123,
"max": 0.22597242891788483,
"mean": -7.254729280248284e-05,
"std": 0.04055875167250633,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_v.bias": {
"min": -0.069511778652668,
"max": 0.06321422755718231,
"mean": 0.00015925483603496104,
"std": 0.01475309394299984,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_out.0.weight": {
"min": -0.46553534269332886,
"max": 0.32018300890922546,
"mean": 1.9559764041332528e-05,
"std": 0.040594302117824554,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_out.0.bias": {
"min": -0.06415701657533646,
"max": 0.11569144576787949,
"mean": 0.0011994449887424707,
"std": 0.024716829881072044,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.3.g": {
"min": 0.37491846084594727,
"max": 0.9332267045974731,
"mean": 0.7511833310127258,
"std": 0.04030444473028183,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.4.ff.0.0.weight": {
"min": -0.2793797552585602,
"max": 0.2735174894332886,
"mean": -0.00016838237934280187,
"std": 0.04100488871335983,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.20.4.ff.0.0.bias": {
"min": -0.19866259396076202,
"max": 0.05138175189495087,
"mean": -0.03203893452882767,
"std": 0.025100193917751312,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.20.4.ff.2.weight": {
"min": -0.6591871976852417,
"max": 0.5361859798431396,
"mean": -5.0474118324927986e-05,
"std": 0.0528571642935276,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.20.4.ff.2.bias": {
"min": -0.19288860261440277,
"max": 0.582888662815094,
"mean": -0.0005087298923172057,
"std": 0.0411086231470108,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.0.weight": {
"min": -0.41760918498039246,
"max": 0.3719828724861145,
"mean": 6.52037670079153e-06,
"std": 0.02162792719900608,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.21.1.g": {
"min": 0.21464084088802338,
"max": 0.7477675080299377,
"mean": 0.6495819687843323,
"std": 0.054441265761852264,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_q.weight": {
"min": -0.20966503024101257,
"max": 0.1956944614648819,
"mean": 4.008584801340476e-05,
"std": 0.039459552615880966,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_q.bias": {
"min": -0.32997503876686096,
"max": 0.25995907187461853,
"mean": -0.0032368863467127085,
"std": 0.05632346495985985,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_k.weight": {
"min": -0.20606832206249237,
"max": 0.2548881471157074,
"mean": 5.397828499553725e-05,
"std": 0.03856222704052925,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_k.bias": {
"min": -6.250948905944824,
"max": 6.940567493438721,
"mean": 0.048394568264484406,
"std": 1.3862435817718506,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_v.weight": {
"min": -0.20990008115768433,
"max": 0.23062950372695923,
"mean": -4.797322617378086e-06,
"std": 0.04131775721907616,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_v.bias": {
"min": -0.043879762291908264,
"max": 0.03602854162454605,
"mean": -6.735368515364826e-06,
"std": 0.012802576646208763,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_out.0.weight": {
"min": -0.3975800573825836,
"max": 0.3450191617012024,
"mean": -5.543587758438662e-05,
"std": 0.04239463433623314,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_out.0.bias": {
"min": -0.055230122059583664,
"max": 0.06288789957761765,
"mean": 0.00035758066223934293,
"std": 0.018682915717363358,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.3.g": {
"min": 0.35092663764953613,
"max": 1.0465692281723022,
"mean": 0.7897400856018066,
"std": 0.04884057492017746,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.4.ff.0.0.weight": {
"min": -0.33373889327049255,
"max": 0.3863142132759094,
"mean": -0.00016909500118345022,
"std": 0.04149040952324867,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.21.4.ff.0.0.bias": {
"min": -0.15769430994987488,
"max": 0.059132885187864304,
"mean": -0.03183465823531151,
"std": 0.025120330974459648,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.21.4.ff.2.weight": {
"min": -0.6965411305427551,
"max": 0.46967917680740356,
"mean": -8.504216384608299e-05,
"std": 0.05180637910962105,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.21.4.ff.2.bias": {
"min": -0.24813847243785858,
"max": 0.3292423188686371,
"mean": -0.00026213712408207357,
"std": 0.041475165635347366,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.0.weight": {
"min": -0.2870347499847412,
"max": 0.3504159152507782,
"mean": -2.7635057904262794e-06,
"std": 0.024241114035248756,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.22.1.g": {
"min": 0.1968143880367279,
"max": 0.7801634073257446,
"mean": 0.67032390832901,
"std": 0.058765437453985214,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_q.weight": {
"min": -0.22936369478702545,
"max": 0.23155838251113892,
"mean": -2.0868072169832885e-05,
"std": 0.0404399111866951,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_q.bias": {
"min": -0.22028712928295135,
"max": 0.2412400096654892,
"mean": 0.0007798401638865471,
"std": 0.05588255077600479,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_k.weight": {
"min": -0.21693190932273865,
"max": 0.2265695184469223,
"mean": -7.217879465315491e-05,
"std": 0.039374105632305145,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_k.bias": {
"min": -8.916163444519043,
"max": 9.079217910766602,
"mean": -0.0012825923040509224,
"std": 1.8500556945800781,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_v.weight": {
"min": -0.269673228263855,
"max": 0.2592774033546448,
"mean": 4.366856592241675e-05,
"std": 0.038410674780607224,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_v.bias": {
"min": -0.05804191157221794,
"max": 0.05804998800158501,
"mean": 0.0003545111685525626,
"std": 0.014721807092428207,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_out.0.weight": {
"min": -0.2641296982765198,
"max": 0.2882002294063568,
"mean": -6.158516043797135e-05,
"std": 0.039077457040548325,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_out.0.bias": {
"min": -0.044157613068819046,
"max": 0.03739722818136215,
"mean": -9.842761210165918e-05,
"std": 0.013352800160646439,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.3.g": {
"min": 0.3394981026649475,
"max": 1.0940546989440918,
"mean": 0.8640274405479431,
"std": 0.06395779550075531,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.4.ff.0.0.weight": {
"min": -0.42318135499954224,
"max": 0.41912782192230225,
"mean": 0.0003136250888928771,
"std": 0.04351290315389633,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.22.4.ff.0.0.bias": {
"min": -0.215034618973732,
"max": 0.17091527581214905,
"mean": -0.02945549227297306,
"std": 0.031898606568574905,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.22.4.ff.2.weight": {
"min": -0.5991082191467285,
"max": 0.5603575706481934,
"mean": -0.0001479926722822711,
"std": 0.05346138775348663,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.22.4.ff.2.bias": {
"min": -0.17912201583385468,
"max": 0.3778008818626404,
"mean": 0.0013520645443350077,
"std": 0.037332892417907715,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.0.weight": {
"min": -0.3943796157836914,
"max": 0.3688676655292511,
"mean": 3.761224070331082e-05,
"std": 0.028617393225431442,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.23.1.g": {
"min": 0.2908812463283539,
"max": 0.8286238312721252,
"mean": 0.7055914402008057,
"std": 0.06791043281555176,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_q.weight": {
"min": -0.9266071319580078,
"max": 1.0270264148712158,
"mean": -2.7955527912126854e-05,
"std": 0.0476437471807003,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_q.bias": {
"min": -0.8803294897079468,
"max": 0.8167775273323059,
"mean": -0.0002962773141916841,
"std": 0.09563106298446655,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_k.weight": {
"min": -0.27031898498535156,
"max": 0.24110636115074158,
"mean": -2.252469494123943e-05,
"std": 0.03894982486963272,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_k.bias": {
"min": -23.770000457763672,
"max": 22.87746810913086,
"mean": -0.09194529056549072,
"std": 4.074869632720947,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_v.weight": {
"min": -0.22796331346035004,
"max": 0.2458551675081253,
"mean": -2.5422079488635063e-05,
"std": 0.038641415536403656,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_v.bias": {
"min": -0.060239437967538834,
"max": 0.045478228479623795,
"mean": -0.00013640533143188804,
"std": 0.01469514612108469,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_out.0.weight": {
"min": -0.33809611201286316,
"max": 0.3752952516078949,
"mean": 7.530758921348024e-06,
"std": 0.040820345282554626,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_out.0.bias": {
"min": -0.04625128582119942,
"max": 0.1955953687429428,
"mean": 0.0002734389272518456,
"std": 0.013558450154960155,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.3.g": {
"min": 0.37381020188331604,
"max": 1.1318634748458862,
"mean": 0.8903213143348694,
"std": 0.0641312375664711,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.4.ff.0.0.weight": {
"min": -0.447549432516098,
"max": 0.5427570939064026,
"mean": 2.5110648493864574e-05,
"std": 0.04558061435818672,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.23.4.ff.0.0.bias": {
"min": -0.22403661906719208,
"max": 0.08747347444295883,
"mean": -0.03202786669135094,
"std": 0.037772756069898605,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.23.4.ff.2.weight": {
"min": -0.7269205451011658,
"max": 0.6894555687904358,
"mean": 3.6393928894540295e-05,
"std": 0.05179436132311821,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.23.4.ff.2.bias": {
"min": -0.1745767593383789,
"max": 0.21847710013389587,
"mean": 3.5673321690410376e-05,
"std": 0.03179144486784935,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.0.weight": {
"min": -0.339706152677536,
"max": 0.37326323986053467,
"mean": 4.3032145185861737e-05,
"std": 0.03413531556725502,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.24.1.g": {
"min": 0.3174583911895752,
"max": 1.2890191078186035,
"mean": 0.601619303226471,
"std": 0.08366930484771729,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_q.weight": {
"min": -0.2832256853580475,
"max": 0.26046571135520935,
"mean": -2.993364205394755e-06,
"std": 0.03598063439130783,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_q.bias": {
"min": -0.2360483556985855,
"max": 0.20603413879871368,
"mean": 0.00023948654416017234,
"std": 0.05606625974178314,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_k.weight": {
"min": -0.4355963468551636,
"max": 0.32496193051338196,
"mean": 2.4223818400059827e-05,
"std": 0.034124087542295456,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_k.bias": {
"min": -5.552776336669922,
"max": 7.322168350219727,
"mean": -0.00738462433218956,
"std": 0.7001185417175293,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_v.weight": {
"min": -0.34443002939224243,
"max": 0.3632832467556,
"mean": 0.00010313428356312215,
"std": 0.047836337238550186,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_v.bias": {
"min": -0.07385826855897903,
"max": 0.06043381989002228,
"mean": 0.0009369200561195612,
"std": 0.014941117726266384,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_out.0.weight": {
"min": -0.2565152943134308,
"max": 0.28712597489356995,
"mean": 4.846529918722808e-06,
"std": 0.041564520448446274,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_out.0.bias": {
"min": -0.05538159981369972,
"max": 0.06288077682256699,
"mean": 0.00012733059702441096,
"std": 0.007154808379709721,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.3.g": {
"min": 0.49408578872680664,
"max": 1.2223646640777588,
"mean": 1.013702154159546,
"std": 0.11764581501483917,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.4.ff.0.0.weight": {
"min": -1.0940581560134888,
"max": 1.0475841760635376,
"mean": -4.863579306402244e-05,
"std": 0.0524178184568882,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.24.4.ff.0.0.bias": {
"min": -0.22388966381549835,
"max": 0.1732550710439682,
"mean": -0.027240199968218803,
"std": 0.03634064644575119,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.24.4.ff.2.weight": {
"min": -0.8849446177482605,
"max": 0.9234321713447571,
"mean": -0.0001459874474676326,
"std": 0.05329861491918564,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.24.4.ff.2.bias": {
"min": -0.17124590277671814,
"max": 0.38005468249320984,
"mean": 0.0033688729163259268,
"std": 0.03990017995238304,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.0.weight": {
"min": -0.7780460119247437,
"max": 0.722984254360199,
"mean": 1.8001555872615427e-05,
"std": 0.046154171228408813,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.25.1.g": {
"min": 0.33841073513031006,
"max": 1.4301798343658447,
"mean": 0.9487167596817017,
"std": 0.20710234344005585,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_q.weight": {
"min": -1.7458388805389404,
"max": 1.704530119895935,
"mean": 0.000226972799282521,
"std": 0.15870548784732819,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_q.bias": {
"min": -1.2008311748504639,
"max": 1.1021909713745117,
"mean": -0.009556617587804794,
"std": 0.20411409437656403,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_k.weight": {
"min": -0.4210456311702728,
"max": 0.4282980263233185,
"mean": 6.39081554254517e-05,
"std": 0.04802015796303749,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_k.bias": {
"min": -19.769929885864258,
"max": 19.564817428588867,
"mean": -0.24858255684375763,
"std": 4.782279968261719,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_v.weight": {
"min": -0.3241115212440491,
"max": 0.43888670206069946,
"mean": -1.1728005119948648e-05,
"std": 0.04616701602935791,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_v.bias": {
"min": -0.03380877524614334,
"max": 0.036888398230075836,
"mean": 0.0006396375247277319,
"std": 0.012913818471133709,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_out.0.weight": {
"min": -0.7038182020187378,
"max": 0.6691953539848328,
"mean": 4.2681567720137537e-05,
"std": 0.05789203941822052,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_out.0.bias": {
"min": -0.07242082059383392,
"max": 0.06784311681985855,
"mean": -0.000134931382490322,
"std": 0.01290101557970047,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.3.g": {
"min": 0.3802341818809509,
"max": 1.39493727684021,
"mean": 1.0668972730636597,
"std": 0.21994373202323914,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.4.ff.0.0.weight": {
"min": -0.6166523694992065,
"max": 0.7187345623970032,
"mean": 0.0001129009760916233,
"std": 0.0580277256667614,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.25.4.ff.0.0.bias": {
"min": -0.21905651688575745,
"max": 0.22523820400238037,
"mean": 0.006192180328071117,
"std": 0.049731798470020294,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.25.4.ff.2.weight": {
"min": -0.6298801898956299,
"max": 0.8897404074668884,
"mean": 1.237633296113927e-05,
"std": 0.023545268923044205,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.25.4.ff.2.bias": {
"min": -0.5074089765548706,
"max": 0.4742584228515625,
"mean": -0.0030243899673223495,
"std": 0.06931118667125702,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.norm_out.g": {
"min": 0.5381640791893005,
"max": 1.182090163230896,
"mean": 0.7830706238746643,
"std": 0.09912356734275818,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.proj_out.weight": {
"min": -0.2673421800136566,
"max": 0.21319416165351868,
"mean": -0.0002236190193798393,
"std": 0.05400572717189789,
"sparsity": 0.0,
"shape": [
100,
1024
]
},
"transformer.proj_out.bias": {
"min": -0.23863200843334198,
"max": 0.014863962307572365,
"mean": -0.04393288493156433,
"std": 0.03432033956050873,
"sparsity": 0.0,
"shape": [
100
]
}
}
}