zombieda0 / model_analysis.json
memevis's picture
Upload folder using huggingface_hub
8b027e1 verified
{
"layer_types": {
"transformer": 391
},
"parameter_counts": {
"transformer.time_embed.time_mlp.0.weight": 262144,
"transformer.time_embed.time_mlp.0.bias": 1024,
"transformer.time_embed.time_mlp.2.weight": 1048576,
"transformer.time_embed.time_mlp.2.bias": 1024,
"transformer.text_embed.text_embed.weight": 254600,
"transformer.input_embed.proj.weight": 307200,
"transformer.input_embed.proj.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024,
"transformer.layers.0.1.g": 1024,
"transformer.layers.0.2.to_q.weight": 1048576,
"transformer.layers.0.2.to_q.bias": 1024,
"transformer.layers.0.2.to_k.weight": 1048576,
"transformer.layers.0.2.to_k.bias": 1024,
"transformer.layers.0.2.to_v.weight": 1048576,
"transformer.layers.0.2.to_v.bias": 1024,
"transformer.layers.0.2.to_out.0.weight": 1048576,
"transformer.layers.0.2.to_out.0.bias": 1024,
"transformer.layers.0.3.g": 1024,
"transformer.layers.0.4.ff.0.0.weight": 4194304,
"transformer.layers.0.4.ff.0.0.bias": 4096,
"transformer.layers.0.4.ff.2.weight": 4194304,
"transformer.layers.0.4.ff.2.bias": 1024,
"transformer.layers.1.1.g": 1024,
"transformer.layers.1.2.to_q.weight": 1048576,
"transformer.layers.1.2.to_q.bias": 1024,
"transformer.layers.1.2.to_k.weight": 1048576,
"transformer.layers.1.2.to_k.bias": 1024,
"transformer.layers.1.2.to_v.weight": 1048576,
"transformer.layers.1.2.to_v.bias": 1024,
"transformer.layers.1.2.to_out.0.weight": 1048576,
"transformer.layers.1.2.to_out.0.bias": 1024,
"transformer.layers.1.3.g": 1024,
"transformer.layers.1.4.ff.0.0.weight": 4194304,
"transformer.layers.1.4.ff.0.0.bias": 4096,
"transformer.layers.1.4.ff.2.weight": 4194304,
"transformer.layers.1.4.ff.2.bias": 1024,
"transformer.layers.2.1.g": 1024,
"transformer.layers.2.2.to_q.weight": 1048576,
"transformer.layers.2.2.to_q.bias": 1024,
"transformer.layers.2.2.to_k.weight": 1048576,
"transformer.layers.2.2.to_k.bias": 1024,
"transformer.layers.2.2.to_v.weight": 1048576,
"transformer.layers.2.2.to_v.bias": 1024,
"transformer.layers.2.2.to_out.0.weight": 1048576,
"transformer.layers.2.2.to_out.0.bias": 1024,
"transformer.layers.2.3.g": 1024,
"transformer.layers.2.4.ff.0.0.weight": 4194304,
"transformer.layers.2.4.ff.0.0.bias": 4096,
"transformer.layers.2.4.ff.2.weight": 4194304,
"transformer.layers.2.4.ff.2.bias": 1024,
"transformer.layers.3.1.g": 1024,
"transformer.layers.3.2.to_q.weight": 1048576,
"transformer.layers.3.2.to_q.bias": 1024,
"transformer.layers.3.2.to_k.weight": 1048576,
"transformer.layers.3.2.to_k.bias": 1024,
"transformer.layers.3.2.to_v.weight": 1048576,
"transformer.layers.3.2.to_v.bias": 1024,
"transformer.layers.3.2.to_out.0.weight": 1048576,
"transformer.layers.3.2.to_out.0.bias": 1024,
"transformer.layers.3.3.g": 1024,
"transformer.layers.3.4.ff.0.0.weight": 4194304,
"transformer.layers.3.4.ff.0.0.bias": 4096,
"transformer.layers.3.4.ff.2.weight": 4194304,
"transformer.layers.3.4.ff.2.bias": 1024,
"transformer.layers.4.1.g": 1024,
"transformer.layers.4.2.to_q.weight": 1048576,
"transformer.layers.4.2.to_q.bias": 1024,
"transformer.layers.4.2.to_k.weight": 1048576,
"transformer.layers.4.2.to_k.bias": 1024,
"transformer.layers.4.2.to_v.weight": 1048576,
"transformer.layers.4.2.to_v.bias": 1024,
"transformer.layers.4.2.to_out.0.weight": 1048576,
"transformer.layers.4.2.to_out.0.bias": 1024,
"transformer.layers.4.3.g": 1024,
"transformer.layers.4.4.ff.0.0.weight": 4194304,
"transformer.layers.4.4.ff.0.0.bias": 4096,
"transformer.layers.4.4.ff.2.weight": 4194304,
"transformer.layers.4.4.ff.2.bias": 1024,
"transformer.layers.5.1.g": 1024,
"transformer.layers.5.2.to_q.weight": 1048576,
"transformer.layers.5.2.to_q.bias": 1024,
"transformer.layers.5.2.to_k.weight": 1048576,
"transformer.layers.5.2.to_k.bias": 1024,
"transformer.layers.5.2.to_v.weight": 1048576,
"transformer.layers.5.2.to_v.bias": 1024,
"transformer.layers.5.2.to_out.0.weight": 1048576,
"transformer.layers.5.2.to_out.0.bias": 1024,
"transformer.layers.5.3.g": 1024,
"transformer.layers.5.4.ff.0.0.weight": 4194304,
"transformer.layers.5.4.ff.0.0.bias": 4096,
"transformer.layers.5.4.ff.2.weight": 4194304,
"transformer.layers.5.4.ff.2.bias": 1024,
"transformer.layers.6.1.g": 1024,
"transformer.layers.6.2.to_q.weight": 1048576,
"transformer.layers.6.2.to_q.bias": 1024,
"transformer.layers.6.2.to_k.weight": 1048576,
"transformer.layers.6.2.to_k.bias": 1024,
"transformer.layers.6.2.to_v.weight": 1048576,
"transformer.layers.6.2.to_v.bias": 1024,
"transformer.layers.6.2.to_out.0.weight": 1048576,
"transformer.layers.6.2.to_out.0.bias": 1024,
"transformer.layers.6.3.g": 1024,
"transformer.layers.6.4.ff.0.0.weight": 4194304,
"transformer.layers.6.4.ff.0.0.bias": 4096,
"transformer.layers.6.4.ff.2.weight": 4194304,
"transformer.layers.6.4.ff.2.bias": 1024,
"transformer.layers.7.1.g": 1024,
"transformer.layers.7.2.to_q.weight": 1048576,
"transformer.layers.7.2.to_q.bias": 1024,
"transformer.layers.7.2.to_k.weight": 1048576,
"transformer.layers.7.2.to_k.bias": 1024,
"transformer.layers.7.2.to_v.weight": 1048576,
"transformer.layers.7.2.to_v.bias": 1024,
"transformer.layers.7.2.to_out.0.weight": 1048576,
"transformer.layers.7.2.to_out.0.bias": 1024,
"transformer.layers.7.3.g": 1024,
"transformer.layers.7.4.ff.0.0.weight": 4194304,
"transformer.layers.7.4.ff.0.0.bias": 4096,
"transformer.layers.7.4.ff.2.weight": 4194304,
"transformer.layers.7.4.ff.2.bias": 1024,
"transformer.layers.8.1.g": 1024,
"transformer.layers.8.2.to_q.weight": 1048576,
"transformer.layers.8.2.to_q.bias": 1024,
"transformer.layers.8.2.to_k.weight": 1048576,
"transformer.layers.8.2.to_k.bias": 1024,
"transformer.layers.8.2.to_v.weight": 1048576,
"transformer.layers.8.2.to_v.bias": 1024,
"transformer.layers.8.2.to_out.0.weight": 1048576,
"transformer.layers.8.2.to_out.0.bias": 1024,
"transformer.layers.8.3.g": 1024,
"transformer.layers.8.4.ff.0.0.weight": 4194304,
"transformer.layers.8.4.ff.0.0.bias": 4096,
"transformer.layers.8.4.ff.2.weight": 4194304,
"transformer.layers.8.4.ff.2.bias": 1024,
"transformer.layers.9.1.g": 1024,
"transformer.layers.9.2.to_q.weight": 1048576,
"transformer.layers.9.2.to_q.bias": 1024,
"transformer.layers.9.2.to_k.weight": 1048576,
"transformer.layers.9.2.to_k.bias": 1024,
"transformer.layers.9.2.to_v.weight": 1048576,
"transformer.layers.9.2.to_v.bias": 1024,
"transformer.layers.9.2.to_out.0.weight": 1048576,
"transformer.layers.9.2.to_out.0.bias": 1024,
"transformer.layers.9.3.g": 1024,
"transformer.layers.9.4.ff.0.0.weight": 4194304,
"transformer.layers.9.4.ff.0.0.bias": 4096,
"transformer.layers.9.4.ff.2.weight": 4194304,
"transformer.layers.9.4.ff.2.bias": 1024,
"transformer.layers.10.1.g": 1024,
"transformer.layers.10.2.to_q.weight": 1048576,
"transformer.layers.10.2.to_q.bias": 1024,
"transformer.layers.10.2.to_k.weight": 1048576,
"transformer.layers.10.2.to_k.bias": 1024,
"transformer.layers.10.2.to_v.weight": 1048576,
"transformer.layers.10.2.to_v.bias": 1024,
"transformer.layers.10.2.to_out.0.weight": 1048576,
"transformer.layers.10.2.to_out.0.bias": 1024,
"transformer.layers.10.3.g": 1024,
"transformer.layers.10.4.ff.0.0.weight": 4194304,
"transformer.layers.10.4.ff.0.0.bias": 4096,
"transformer.layers.10.4.ff.2.weight": 4194304,
"transformer.layers.10.4.ff.2.bias": 1024,
"transformer.layers.11.1.g": 1024,
"transformer.layers.11.2.to_q.weight": 1048576,
"transformer.layers.11.2.to_q.bias": 1024,
"transformer.layers.11.2.to_k.weight": 1048576,
"transformer.layers.11.2.to_k.bias": 1024,
"transformer.layers.11.2.to_v.weight": 1048576,
"transformer.layers.11.2.to_v.bias": 1024,
"transformer.layers.11.2.to_out.0.weight": 1048576,
"transformer.layers.11.2.to_out.0.bias": 1024,
"transformer.layers.11.3.g": 1024,
"transformer.layers.11.4.ff.0.0.weight": 4194304,
"transformer.layers.11.4.ff.0.0.bias": 4096,
"transformer.layers.11.4.ff.2.weight": 4194304,
"transformer.layers.11.4.ff.2.bias": 1024,
"transformer.layers.12.1.g": 1024,
"transformer.layers.12.2.to_q.weight": 1048576,
"transformer.layers.12.2.to_q.bias": 1024,
"transformer.layers.12.2.to_k.weight": 1048576,
"transformer.layers.12.2.to_k.bias": 1024,
"transformer.layers.12.2.to_v.weight": 1048576,
"transformer.layers.12.2.to_v.bias": 1024,
"transformer.layers.12.2.to_out.0.weight": 1048576,
"transformer.layers.12.2.to_out.0.bias": 1024,
"transformer.layers.12.3.g": 1024,
"transformer.layers.12.4.ff.0.0.weight": 4194304,
"transformer.layers.12.4.ff.0.0.bias": 4096,
"transformer.layers.12.4.ff.2.weight": 4194304,
"transformer.layers.12.4.ff.2.bias": 1024,
"transformer.layers.13.0.weight": 2097152,
"transformer.layers.13.1.g": 1024,
"transformer.layers.13.2.to_q.weight": 1048576,
"transformer.layers.13.2.to_q.bias": 1024,
"transformer.layers.13.2.to_k.weight": 1048576,
"transformer.layers.13.2.to_k.bias": 1024,
"transformer.layers.13.2.to_v.weight": 1048576,
"transformer.layers.13.2.to_v.bias": 1024,
"transformer.layers.13.2.to_out.0.weight": 1048576,
"transformer.layers.13.2.to_out.0.bias": 1024,
"transformer.layers.13.3.g": 1024,
"transformer.layers.13.4.ff.0.0.weight": 4194304,
"transformer.layers.13.4.ff.0.0.bias": 4096,
"transformer.layers.13.4.ff.2.weight": 4194304,
"transformer.layers.13.4.ff.2.bias": 1024,
"transformer.layers.14.0.weight": 2097152,
"transformer.layers.14.1.g": 1024,
"transformer.layers.14.2.to_q.weight": 1048576,
"transformer.layers.14.2.to_q.bias": 1024,
"transformer.layers.14.2.to_k.weight": 1048576,
"transformer.layers.14.2.to_k.bias": 1024,
"transformer.layers.14.2.to_v.weight": 1048576,
"transformer.layers.14.2.to_v.bias": 1024,
"transformer.layers.14.2.to_out.0.weight": 1048576,
"transformer.layers.14.2.to_out.0.bias": 1024,
"transformer.layers.14.3.g": 1024,
"transformer.layers.14.4.ff.0.0.weight": 4194304,
"transformer.layers.14.4.ff.0.0.bias": 4096,
"transformer.layers.14.4.ff.2.weight": 4194304,
"transformer.layers.14.4.ff.2.bias": 1024,
"transformer.layers.15.0.weight": 2097152,
"transformer.layers.15.1.g": 1024,
"transformer.layers.15.2.to_q.weight": 1048576,
"transformer.layers.15.2.to_q.bias": 1024,
"transformer.layers.15.2.to_k.weight": 1048576,
"transformer.layers.15.2.to_k.bias": 1024,
"transformer.layers.15.2.to_v.weight": 1048576,
"transformer.layers.15.2.to_v.bias": 1024,
"transformer.layers.15.2.to_out.0.weight": 1048576,
"transformer.layers.15.2.to_out.0.bias": 1024,
"transformer.layers.15.3.g": 1024,
"transformer.layers.15.4.ff.0.0.weight": 4194304,
"transformer.layers.15.4.ff.0.0.bias": 4096,
"transformer.layers.15.4.ff.2.weight": 4194304,
"transformer.layers.15.4.ff.2.bias": 1024,
"transformer.layers.16.0.weight": 2097152,
"transformer.layers.16.1.g": 1024,
"transformer.layers.16.2.to_q.weight": 1048576,
"transformer.layers.16.2.to_q.bias": 1024,
"transformer.layers.16.2.to_k.weight": 1048576,
"transformer.layers.16.2.to_k.bias": 1024,
"transformer.layers.16.2.to_v.weight": 1048576,
"transformer.layers.16.2.to_v.bias": 1024,
"transformer.layers.16.2.to_out.0.weight": 1048576,
"transformer.layers.16.2.to_out.0.bias": 1024,
"transformer.layers.16.3.g": 1024,
"transformer.layers.16.4.ff.0.0.weight": 4194304,
"transformer.layers.16.4.ff.0.0.bias": 4096,
"transformer.layers.16.4.ff.2.weight": 4194304,
"transformer.layers.16.4.ff.2.bias": 1024,
"transformer.layers.17.0.weight": 2097152,
"transformer.layers.17.1.g": 1024,
"transformer.layers.17.2.to_q.weight": 1048576,
"transformer.layers.17.2.to_q.bias": 1024,
"transformer.layers.17.2.to_k.weight": 1048576,
"transformer.layers.17.2.to_k.bias": 1024,
"transformer.layers.17.2.to_v.weight": 1048576,
"transformer.layers.17.2.to_v.bias": 1024,
"transformer.layers.17.2.to_out.0.weight": 1048576,
"transformer.layers.17.2.to_out.0.bias": 1024,
"transformer.layers.17.3.g": 1024,
"transformer.layers.17.4.ff.0.0.weight": 4194304,
"transformer.layers.17.4.ff.0.0.bias": 4096,
"transformer.layers.17.4.ff.2.weight": 4194304,
"transformer.layers.17.4.ff.2.bias": 1024,
"transformer.layers.18.0.weight": 2097152,
"transformer.layers.18.1.g": 1024,
"transformer.layers.18.2.to_q.weight": 1048576,
"transformer.layers.18.2.to_q.bias": 1024,
"transformer.layers.18.2.to_k.weight": 1048576,
"transformer.layers.18.2.to_k.bias": 1024,
"transformer.layers.18.2.to_v.weight": 1048576,
"transformer.layers.18.2.to_v.bias": 1024,
"transformer.layers.18.2.to_out.0.weight": 1048576,
"transformer.layers.18.2.to_out.0.bias": 1024,
"transformer.layers.18.3.g": 1024,
"transformer.layers.18.4.ff.0.0.weight": 4194304,
"transformer.layers.18.4.ff.0.0.bias": 4096,
"transformer.layers.18.4.ff.2.weight": 4194304,
"transformer.layers.18.4.ff.2.bias": 1024,
"transformer.layers.19.0.weight": 2097152,
"transformer.layers.19.1.g": 1024,
"transformer.layers.19.2.to_q.weight": 1048576,
"transformer.layers.19.2.to_q.bias": 1024,
"transformer.layers.19.2.to_k.weight": 1048576,
"transformer.layers.19.2.to_k.bias": 1024,
"transformer.layers.19.2.to_v.weight": 1048576,
"transformer.layers.19.2.to_v.bias": 1024,
"transformer.layers.19.2.to_out.0.weight": 1048576,
"transformer.layers.19.2.to_out.0.bias": 1024,
"transformer.layers.19.3.g": 1024,
"transformer.layers.19.4.ff.0.0.weight": 4194304,
"transformer.layers.19.4.ff.0.0.bias": 4096,
"transformer.layers.19.4.ff.2.weight": 4194304,
"transformer.layers.19.4.ff.2.bias": 1024,
"transformer.layers.20.0.weight": 2097152,
"transformer.layers.20.1.g": 1024,
"transformer.layers.20.2.to_q.weight": 1048576,
"transformer.layers.20.2.to_q.bias": 1024,
"transformer.layers.20.2.to_k.weight": 1048576,
"transformer.layers.20.2.to_k.bias": 1024,
"transformer.layers.20.2.to_v.weight": 1048576,
"transformer.layers.20.2.to_v.bias": 1024,
"transformer.layers.20.2.to_out.0.weight": 1048576,
"transformer.layers.20.2.to_out.0.bias": 1024,
"transformer.layers.20.3.g": 1024,
"transformer.layers.20.4.ff.0.0.weight": 4194304,
"transformer.layers.20.4.ff.0.0.bias": 4096,
"transformer.layers.20.4.ff.2.weight": 4194304,
"transformer.layers.20.4.ff.2.bias": 1024,
"transformer.layers.21.0.weight": 2097152,
"transformer.layers.21.1.g": 1024,
"transformer.layers.21.2.to_q.weight": 1048576,
"transformer.layers.21.2.to_q.bias": 1024,
"transformer.layers.21.2.to_k.weight": 1048576,
"transformer.layers.21.2.to_k.bias": 1024,
"transformer.layers.21.2.to_v.weight": 1048576,
"transformer.layers.21.2.to_v.bias": 1024,
"transformer.layers.21.2.to_out.0.weight": 1048576,
"transformer.layers.21.2.to_out.0.bias": 1024,
"transformer.layers.21.3.g": 1024,
"transformer.layers.21.4.ff.0.0.weight": 4194304,
"transformer.layers.21.4.ff.0.0.bias": 4096,
"transformer.layers.21.4.ff.2.weight": 4194304,
"transformer.layers.21.4.ff.2.bias": 1024,
"transformer.layers.22.0.weight": 2097152,
"transformer.layers.22.1.g": 1024,
"transformer.layers.22.2.to_q.weight": 1048576,
"transformer.layers.22.2.to_q.bias": 1024,
"transformer.layers.22.2.to_k.weight": 1048576,
"transformer.layers.22.2.to_k.bias": 1024,
"transformer.layers.22.2.to_v.weight": 1048576,
"transformer.layers.22.2.to_v.bias": 1024,
"transformer.layers.22.2.to_out.0.weight": 1048576,
"transformer.layers.22.2.to_out.0.bias": 1024,
"transformer.layers.22.3.g": 1024,
"transformer.layers.22.4.ff.0.0.weight": 4194304,
"transformer.layers.22.4.ff.0.0.bias": 4096,
"transformer.layers.22.4.ff.2.weight": 4194304,
"transformer.layers.22.4.ff.2.bias": 1024,
"transformer.layers.23.0.weight": 2097152,
"transformer.layers.23.1.g": 1024,
"transformer.layers.23.2.to_q.weight": 1048576,
"transformer.layers.23.2.to_q.bias": 1024,
"transformer.layers.23.2.to_k.weight": 1048576,
"transformer.layers.23.2.to_k.bias": 1024,
"transformer.layers.23.2.to_v.weight": 1048576,
"transformer.layers.23.2.to_v.bias": 1024,
"transformer.layers.23.2.to_out.0.weight": 1048576,
"transformer.layers.23.2.to_out.0.bias": 1024,
"transformer.layers.23.3.g": 1024,
"transformer.layers.23.4.ff.0.0.weight": 4194304,
"transformer.layers.23.4.ff.0.0.bias": 4096,
"transformer.layers.23.4.ff.2.weight": 4194304,
"transformer.layers.23.4.ff.2.bias": 1024,
"transformer.layers.24.0.weight": 2097152,
"transformer.layers.24.1.g": 1024,
"transformer.layers.24.2.to_q.weight": 1048576,
"transformer.layers.24.2.to_q.bias": 1024,
"transformer.layers.24.2.to_k.weight": 1048576,
"transformer.layers.24.2.to_k.bias": 1024,
"transformer.layers.24.2.to_v.weight": 1048576,
"transformer.layers.24.2.to_v.bias": 1024,
"transformer.layers.24.2.to_out.0.weight": 1048576,
"transformer.layers.24.2.to_out.0.bias": 1024,
"transformer.layers.24.3.g": 1024,
"transformer.layers.24.4.ff.0.0.weight": 4194304,
"transformer.layers.24.4.ff.0.0.bias": 4096,
"transformer.layers.24.4.ff.2.weight": 4194304,
"transformer.layers.24.4.ff.2.bias": 1024,
"transformer.layers.25.0.weight": 2097152,
"transformer.layers.25.1.g": 1024,
"transformer.layers.25.2.to_q.weight": 1048576,
"transformer.layers.25.2.to_q.bias": 1024,
"transformer.layers.25.2.to_k.weight": 1048576,
"transformer.layers.25.2.to_k.bias": 1024,
"transformer.layers.25.2.to_v.weight": 1048576,
"transformer.layers.25.2.to_v.bias": 1024,
"transformer.layers.25.2.to_out.0.weight": 1048576,
"transformer.layers.25.2.to_out.0.bias": 1024,
"transformer.layers.25.3.g": 1024,
"transformer.layers.25.4.ff.0.0.weight": 4194304,
"transformer.layers.25.4.ff.0.0.bias": 4096,
"transformer.layers.25.4.ff.2.weight": 4194304,
"transformer.layers.25.4.ff.2.bias": 1024,
"transformer.norm_out.g": 1024,
"transformer.proj_out.weight": 102400,
"transformer.proj_out.bias": 100
},
"important_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
],
"bottleneck_layers": [],
"recommendations": {
"focus_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
]
},
"total_parameters": 391,
"total_elements": 360755948,
"param_ranges": {
"transformer.time_embed.time_mlp.0.weight": {
"min": -0.43045347929000854,
"max": 0.2989708483219147,
"mean": -0.002559528686106205,
"std": 0.042551685124635696,
"sparsity": 0.0,
"shape": [
1024,
256
]
},
"transformer.time_embed.time_mlp.0.bias": {
"min": -0.06319475919008255,
"max": 0.10763752460479736,
"mean": 0.0005878363735973835,
"std": 0.0341116227209568,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.time_embed.time_mlp.2.weight": {
"min": -0.4125802516937256,
"max": 0.8362879157066345,
"mean": -0.00021037086844444275,
"std": 0.024107296019792557,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.time_embed.time_mlp.2.bias": {
"min": -0.11548256129026413,
"max": 0.3214675784111023,
"mean": -0.0009404525626450777,
"std": 0.01957694999873638,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.text_embed.text_embed.weight": {
"min": -2.7917673587799072,
"max": 2.87048602104187,
"mean": -0.000364800012903288,
"std": 0.6153724193572998,
"sparsity": 0.0,
"shape": [
2546,
100
]
},
"transformer.input_embed.proj.weight": {
"min": -0.2789378762245178,
"max": 0.38190650939941406,
"mean": 0.00042029444011859596,
"std": 0.04275033250451088,
"sparsity": 0.0,
"shape": [
1024,
300
]
},
"transformer.input_embed.proj.bias": {
"min": -0.22229844331741333,
"max": 0.20966938138008118,
"mean": -0.004494193941354752,
"std": 0.04090972617268562,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": {
"min": -0.42792314291000366,
"max": 0.4753040671348572,
"mean": 2.5448428004892776e-06,
"std": 0.02450907975435257,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": {
"min": -0.3254566490650177,
"max": 0.15697774291038513,
"mean": -0.046701110899448395,
"std": 0.05157899484038353,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": {
"min": -0.41040217876434326,
"max": 0.3545200824737549,
"mean": -0.00012632929428946227,
"std": 0.023601176217198372,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": {
"min": -0.22976312041282654,
"max": 0.26262250542640686,
"mean": -0.029148582369089127,
"std": 0.049347616732120514,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.1.g": {
"min": 0.25461843609809875,
"max": 0.8200721740722656,
"mean": 0.5254405736923218,
"std": 0.08080819994211197,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_q.weight": {
"min": -0.2969217598438263,
"max": 0.2653011679649353,
"mean": -0.00042407598812133074,
"std": 0.03210418298840523,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_q.bias": {
"min": -0.09274514019489288,
"max": 0.12481185793876648,
"mean": 0.0006486732745543122,
"std": 0.025742683559656143,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_k.weight": {
"min": -0.29045799374580383,
"max": 0.28142276406288147,
"mean": -7.696857210248709e-05,
"std": 0.03093627467751503,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_k.bias": {
"min": -5.8994598388671875,
"max": 5.814236164093018,
"mean": -0.009332070127129555,
"std": 1.2954570055007935,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_v.weight": {
"min": -0.4248283803462982,
"max": 0.3437764346599579,
"mean": 9.760602551978081e-05,
"std": 0.029952971264719963,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_v.bias": {
"min": -0.028973544016480446,
"max": 0.027646001428365707,
"mean": -0.000311461859382689,
"std": 0.01257230993360281,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_out.0.weight": {
"min": -0.45393574237823486,
"max": 0.4486967921257019,
"mean": 2.2734935555490665e-05,
"std": 0.023855067789554596,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_out.0.bias": {
"min": -0.08869241178035736,
"max": 0.09115342795848846,
"mean": 0.0022729213815182447,
"std": 0.019511748105287552,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.3.g": {
"min": 0.26661837100982666,
"max": 1.0562738180160522,
"mean": 0.5311292409896851,
"std": 0.10441415756940842,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.4.ff.0.0.weight": {
"min": -0.5744583010673523,
"max": 0.6083983182907104,
"mean": -0.0004310230724513531,
"std": 0.03859498351812363,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.0.4.ff.0.0.bias": {
"min": -0.18180975317955017,
"max": 0.04576439782977104,
"mean": -0.029441392049193382,
"std": 0.0425901859998703,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.0.4.ff.2.weight": {
"min": -1.1666754484176636,
"max": 1.6346206665039062,
"mean": 0.00031845836201682687,
"std": 0.027693821117281914,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.0.4.ff.2.bias": {
"min": -0.16254130005836487,
"max": 0.20572608709335327,
"mean": -0.021116478368639946,
"std": 0.02794043906033039,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.1.g": {
"min": 0.22449950873851776,
"max": 0.8436615467071533,
"mean": 0.48752841353416443,
"std": 0.07519911974668503,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_q.weight": {
"min": -0.25530511140823364,
"max": 0.30584144592285156,
"mean": -9.390279956278391e-06,
"std": 0.03347048908472061,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_q.bias": {
"min": -0.09549093246459961,
"max": 0.1104247123003006,
"mean": 5.642877658829093e-05,
"std": 0.02698560617864132,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_k.weight": {
"min": -0.29746732115745544,
"max": 0.29597631096839905,
"mean": 5.020098251407035e-05,
"std": 0.03253835067152977,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_k.bias": {
"min": -5.164289474487305,
"max": 5.084513187408447,
"mean": -0.014594512060284615,
"std": 1.157379150390625,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_v.weight": {
"min": -0.34489384293556213,
"max": 0.34349551796913147,
"mean": 7.88411489338614e-05,
"std": 0.030058156698942184,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_v.bias": {
"min": -0.03615685552358627,
"max": 0.033247072249650955,
"mean": -0.0001437932369299233,
"std": 0.0130230151116848,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_out.0.weight": {
"min": -0.31528598070144653,
"max": 0.3752017617225647,
"mean": -2.1658630430465564e-05,
"std": 0.02405543439090252,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_out.0.bias": {
"min": -0.10527704656124115,
"max": 0.12188438326120377,
"mean": -0.001954131992533803,
"std": 0.028842832893133163,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.3.g": {
"min": 0.3118007183074951,
"max": 1.1209547519683838,
"mean": 0.6662399172782898,
"std": 0.09774922579526901,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.4.ff.0.0.weight": {
"min": -0.8724845051765442,
"max": 0.6275652050971985,
"mean": 0.0016756996046751738,
"std": 0.04743832349777222,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.1.4.ff.0.0.bias": {
"min": -0.2710001766681671,
"max": 0.034087300300598145,
"mean": -0.04660267010331154,
"std": 0.040595393627882004,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.1.4.ff.2.weight": {
"min": -0.9202765226364136,
"max": 0.964392364025116,
"mean": 0.0010208573658019304,
"std": 0.040701836347579956,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.1.4.ff.2.bias": {
"min": -0.14455123245716095,
"max": 0.07482488453388214,
"mean": -0.009084297344088554,
"std": 0.025694943964481354,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.1.g": {
"min": 0.2397618293762207,
"max": 0.7124034762382507,
"mean": 0.4472024440765381,
"std": 0.0593235045671463,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_q.weight": {
"min": -0.2730177044868469,
"max": 0.29747670888900757,
"mean": 8.653647455503233e-06,
"std": 0.03547436371445656,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_q.bias": {
"min": -0.11902837455272675,
"max": 0.1184682548046112,
"mean": 0.0007503863889724016,
"std": 0.027607794851064682,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_k.weight": {
"min": -0.28101518750190735,
"max": 0.27942103147506714,
"mean": -7.649646431673318e-05,
"std": 0.03510240092873573,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_k.bias": {
"min": -2.509594440460205,
"max": 2.5215904712677,
"mean": 0.026745371520519257,
"std": 0.5867790579795837,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_v.weight": {
"min": -0.2210964858531952,
"max": 0.2716039717197418,
"mean": 2.442306140437722e-06,
"std": 0.030731501057744026,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_v.bias": {
"min": -0.03315361589193344,
"max": 0.031151030212640762,
"mean": 0.00011695168359437957,
"std": 0.012393992394208908,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_out.0.weight": {
"min": -0.23539169132709503,
"max": 0.23184844851493835,
"mean": 5.725533628719859e-05,
"std": 0.025697585195302963,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_out.0.bias": {
"min": -0.13603144884109497,
"max": 0.12801550328731537,
"mean": -0.005497873295098543,
"std": 0.039962731301784515,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.3.g": {
"min": 0.35472768545150757,
"max": 1.1723560094833374,
"mean": 0.7105388641357422,
"std": 0.10377441346645355,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.4.ff.0.0.weight": {
"min": -0.6173874735832214,
"max": 0.5556294322013855,
"mean": 0.0011603377060964704,
"std": 0.04611397534608841,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.2.4.ff.0.0.bias": {
"min": -0.18947651982307434,
"max": 0.024928653612732887,
"mean": -0.03484659641981125,
"std": 0.028622934594750404,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.2.4.ff.2.weight": {
"min": -1.1309525966644287,
"max": 0.9703920483589172,
"mean": 0.0003591428976505995,
"std": 0.04234250634908676,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.2.4.ff.2.bias": {
"min": -0.59785395860672,
"max": 0.0627356544137001,
"mean": -0.004881600849330425,
"std": 0.028621168807148933,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.1.g": {
"min": 0.3753371834754944,
"max": 0.9404803514480591,
"mean": 0.5924646854400635,
"std": 0.06694936007261276,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_q.weight": {
"min": -0.3917739689350128,
"max": 0.36935487389564514,
"mean": 7.001077028689906e-05,
"std": 0.03718659654259682,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_q.bias": {
"min": -0.11900685727596283,
"max": 0.1365460306406021,
"mean": 0.0009158444590866566,
"std": 0.029187751933932304,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_k.weight": {
"min": -0.6190850138664246,
"max": 0.5087974667549133,
"mean": 1.5220098248391878e-05,
"std": 0.036439333111047745,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_k.bias": {
"min": -8.186792373657227,
"max": 8.788902282714844,
"mean": -0.10927547514438629,
"std": 1.698854923248291,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_v.weight": {
"min": -0.2765258252620697,
"max": 0.23972086608409882,
"mean": 5.2279683586675674e-05,
"std": 0.03261309862136841,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_v.bias": {
"min": -0.051504429429769516,
"max": 0.0394677110016346,
"mean": 9.376452362630516e-05,
"std": 0.012969755567610264,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_out.0.weight": {
"min": -0.23076090216636658,
"max": 0.23486877977848053,
"mean": -2.2034959329175763e-05,
"std": 0.02938973717391491,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_out.0.bias": {
"min": -0.20423753559589386,
"max": 0.10524258017539978,
"mean": -0.004020648077130318,
"std": 0.03263989835977554,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.3.g": {
"min": 0.3396383821964264,
"max": 1.0124459266662598,
"mean": 0.7007039785385132,
"std": 0.09675922244787216,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.4.ff.0.0.weight": {
"min": -0.5647669434547424,
"max": 0.8336009979248047,
"mean": 0.00041507231071591377,
"std": 0.042294517159461975,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.3.4.ff.0.0.bias": {
"min": -0.21213513612747192,
"max": 0.029952630400657654,
"mean": -0.03217371925711632,
"std": 0.026498902589082718,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.3.4.ff.2.weight": {
"min": -0.7548895478248596,
"max": 0.7191285490989685,
"mean": -1.5825342416064814e-05,
"std": 0.03683512657880783,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.3.4.ff.2.bias": {
"min": -0.26342180371284485,
"max": 0.106303870677948,
"mean": -0.0030142769683152437,
"std": 0.028873054310679436,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.1.g": {
"min": 0.2839457094669342,
"max": 0.695040225982666,
"mean": 0.4993869960308075,
"std": 0.04653431475162506,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_q.weight": {
"min": -0.27824723720550537,
"max": 0.23382486402988434,
"mean": -0.00011091126361861825,
"std": 0.03875747323036194,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_q.bias": {
"min": -0.1535824090242386,
"max": 0.12643294036388397,
"mean": -0.0022276602685451508,
"std": 0.03332621976733208,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_k.weight": {
"min": -0.4143897294998169,
"max": 0.6594640016555786,
"mean": -1.8512728274799883e-05,
"std": 0.03909672051668167,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_k.bias": {
"min": -4.237905979156494,
"max": 4.722469329833984,
"mean": -0.020456835627555847,
"std": 1.0076903104782104,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_v.weight": {
"min": -0.24504739046096802,
"max": 0.2075919508934021,
"mean": 4.4300948502495885e-05,
"std": 0.033962640911340714,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_v.bias": {
"min": -0.03446675091981888,
"max": 0.04485952481627464,
"mean": -2.2283929865807295e-05,
"std": 0.01263953372836113,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_out.0.weight": {
"min": -0.20111070573329926,
"max": 0.2064419686794281,
"mean": -2.9351647754083388e-05,
"std": 0.031020889058709145,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_out.0.bias": {
"min": -0.1998225450515747,
"max": 0.11318594217300415,
"mean": -0.002895027631893754,
"std": 0.034535519778728485,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.3.g": {
"min": 0.3667519986629486,
"max": 1.0576496124267578,
"mean": 0.6704938411712646,
"std": 0.06640732288360596,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.4.ff.0.0.weight": {
"min": -0.3984535038471222,
"max": 0.5021195411682129,
"mean": -3.873988316627219e-05,
"std": 0.04113014414906502,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.4.4.ff.0.0.bias": {
"min": -0.12865276634693146,
"max": 0.02695303224027157,
"mean": -0.0305329579859972,
"std": 0.021882230415940285,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.4.4.ff.2.weight": {
"min": -0.44948050379753113,
"max": 0.43325671553611755,
"mean": 7.534700125688687e-05,
"std": 0.03489053621888161,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.4.4.ff.2.bias": {
"min": -0.26749807596206665,
"max": 0.07307979464530945,
"mean": -0.0010903773363679647,
"std": 0.023135719820857048,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.1.g": {
"min": 0.28755003213882446,
"max": 0.6852815747261047,
"mean": 0.5245311260223389,
"std": 0.047535065561532974,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_q.weight": {
"min": -0.22266581654548645,
"max": 0.22331343591213226,
"mean": 1.5911335140117444e-05,
"std": 0.038949206471443176,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_q.bias": {
"min": -0.13634715974330902,
"max": 0.10933983325958252,
"mean": 0.00024775456404313445,
"std": 0.02920820191502571,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_k.weight": {
"min": -0.37493303418159485,
"max": 0.43759024143218994,
"mean": -9.405484888702631e-06,
"std": 0.03928741440176964,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_k.bias": {
"min": -3.8458516597747803,
"max": 4.99931526184082,
"mean": 0.0097417663782835,
"std": 0.8452187180519104,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_v.weight": {
"min": -0.22269685566425323,
"max": 0.22029872238636017,
"mean": -3.309251042082906e-07,
"std": 0.03441028296947479,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_v.bias": {
"min": -0.043786074966192245,
"max": 0.03593028709292412,
"mean": -0.0002595169935375452,
"std": 0.012078601866960526,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_out.0.weight": {
"min": -0.2127062827348709,
"max": 0.18842767179012299,
"mean": -1.7018646758515388e-05,
"std": 0.03153670206665993,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_out.0.bias": {
"min": -0.18093673884868622,
"max": 0.12075397372245789,
"mean": -0.0023954270873218775,
"std": 0.0412798747420311,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.3.g": {
"min": 0.4229053258895874,
"max": 0.9417746663093567,
"mean": 0.6626519560813904,
"std": 0.05681704729795456,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.4.ff.0.0.weight": {
"min": -0.3708723485469818,
"max": 0.4765413999557495,
"mean": -8.208492363337427e-05,
"std": 0.040889330208301544,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.5.4.ff.0.0.bias": {
"min": -0.2084851861000061,
"max": 0.02737521566450596,
"mean": -0.03023434244096279,
"std": 0.021364057436585426,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.5.4.ff.2.weight": {
"min": -0.3406664729118347,
"max": 0.7341601848602295,
"mean": 8.241336036007851e-05,
"std": 0.03476617485284805,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.5.4.ff.2.bias": {
"min": -0.24016013741493225,
"max": 0.05046252906322479,
"mean": -0.0011865145061165094,
"std": 0.02045980468392372,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.1.g": {
"min": 0.3058834671974182,
"max": 0.6534616947174072,
"mean": 0.5251225829124451,
"std": 0.04612237960100174,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_q.weight": {
"min": -0.3043194115161896,
"max": 0.2172033190727234,
"mean": 6.997850869083777e-05,
"std": 0.039497096091508865,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_q.bias": {
"min": -0.14911659061908722,
"max": 0.1309829205274582,
"mean": 0.00032657815609127283,
"std": 0.030455630272626877,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_k.weight": {
"min": -0.2569442689418793,
"max": 0.2018917053937912,
"mean": 3.1276180379791185e-05,
"std": 0.039488255977630615,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_k.bias": {
"min": -2.3362653255462646,
"max": 2.3758890628814697,
"mean": -0.026241008192300797,
"std": 0.44977059960365295,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_v.weight": {
"min": -0.18858182430267334,
"max": 0.21028441190719604,
"mean": 3.710644523380324e-05,
"std": 0.034793708473443985,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_v.bias": {
"min": -0.03167951852083206,
"max": 0.03567720949649811,
"mean": -0.0001978189975488931,
"std": 0.012288851663470268,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_out.0.weight": {
"min": -0.1882917732000351,
"max": 0.1702534258365631,
"mean": -6.83729158481583e-05,
"std": 0.03217038884758949,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_out.0.bias": {
"min": -0.13942022621631622,
"max": 0.1372338831424713,
"mean": -0.0025149777065962553,
"std": 0.05129906162619591,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.3.g": {
"min": 0.4670557677745819,
"max": 0.9555894136428833,
"mean": 0.668860912322998,
"std": 0.052772559225559235,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.4.ff.0.0.weight": {
"min": -0.32439276576042175,
"max": 0.30925771594047546,
"mean": -1.0448575267218985e-06,
"std": 0.04094531387090683,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.6.4.ff.0.0.bias": {
"min": -0.12486255913972855,
"max": 0.025668619200587273,
"mean": -0.030689772218465805,
"std": 0.019822947680950165,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.6.4.ff.2.weight": {
"min": -0.4394906163215637,
"max": 0.4453367292881012,
"mean": 9.582463098922744e-05,
"std": 0.03511909395456314,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.6.4.ff.2.bias": {
"min": -0.22461967170238495,
"max": 0.051830437034368515,
"mean": -0.0011815722100436687,
"std": 0.018466372042894363,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.1.g": {
"min": 0.3391477167606354,
"max": 0.739862322807312,
"mean": 0.558701753616333,
"std": 0.04139617085456848,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_q.weight": {
"min": -0.27299270033836365,
"max": 0.27884820103645325,
"mean": 2.0352346837171353e-05,
"std": 0.04105763137340546,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_q.bias": {
"min": -0.13679315149784088,
"max": 0.13977941870689392,
"mean": 0.0004920524079352617,
"std": 0.026632016524672508,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_k.weight": {
"min": -0.4905266761779785,
"max": 0.35576674342155457,
"mean": 8.910118776839226e-05,
"std": 0.04069532826542854,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_k.bias": {
"min": -2.297072649002075,
"max": 1.7451610565185547,
"mean": -0.02107967808842659,
"std": 0.5001281499862671,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_v.weight": {
"min": -0.21811611950397491,
"max": 0.19743309915065765,
"mean": -4.0164730307878926e-05,
"std": 0.034233368933200836,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_v.bias": {
"min": -0.04114250838756561,
"max": 0.03886367008090019,
"mean": -0.0001361201866529882,
"std": 0.01288355328142643,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_out.0.weight": {
"min": -0.17762865126132965,
"max": 0.1828955113887787,
"mean": 4.802473995368928e-05,
"std": 0.031556740403175354,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_out.0.bias": {
"min": -0.17992889881134033,
"max": 0.18389376997947693,
"mean": -0.002214584732428193,
"std": 0.054829709231853485,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.3.g": {
"min": 0.4741877317428589,
"max": 1.025841474533081,
"mean": 0.6452314257621765,
"std": 0.050352681428194046,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.4.ff.0.0.weight": {
"min": -0.27164527773857117,
"max": 0.30913278460502625,
"mean": 0.00011245411587879062,
"std": 0.04068151116371155,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.7.4.ff.0.0.bias": {
"min": -0.10526741296052933,
"max": 0.0267398189753294,
"mean": -0.029518909752368927,
"std": 0.017934836447238922,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.7.4.ff.2.weight": {
"min": -0.33933401107788086,
"max": 0.3291725814342499,
"mean": 5.2628944104071707e-05,
"std": 0.034412726759910583,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.7.4.ff.2.bias": {
"min": -0.18180128931999207,
"max": 0.04250966012477875,
"mean": -0.0010595148196443915,
"std": 0.017209524288773537,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.1.g": {
"min": 0.3251764476299286,
"max": 0.686564564704895,
"mean": 0.5111627578735352,
"std": 0.03695236146450043,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_q.weight": {
"min": -0.23392871022224426,
"max": 0.22538572549819946,
"mean": -3.6134006222710013e-05,
"std": 0.03917535021901131,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_q.bias": {
"min": -0.11511560529470444,
"max": 0.13181880116462708,
"mean": 0.0001504624669905752,
"std": 0.029160819947719574,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_k.weight": {
"min": -0.3522934317588806,
"max": 0.28486883640289307,
"mean": 6.553360890393378e-06,
"std": 0.03924445062875748,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_k.bias": {
"min": -4.132338523864746,
"max": 3.5437686443328857,
"mean": -0.011590493842959404,
"std": 0.6826138496398926,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_v.weight": {
"min": -0.21074621379375458,
"max": 0.20937031507492065,
"mean": 3.468795330263674e-05,
"std": 0.03448443114757538,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_v.bias": {
"min": -0.03586054965853691,
"max": 0.04796382784843445,
"mean": 0.0007884950027801096,
"std": 0.012871338985860348,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_out.0.weight": {
"min": -0.21027511358261108,
"max": 0.1930612176656723,
"mean": -9.818363650992978e-07,
"std": 0.03169528394937515,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_out.0.bias": {
"min": -0.18642070889472961,
"max": 0.1772109568119049,
"mean": -0.0028416060376912355,
"std": 0.058615587651729584,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.3.g": {
"min": 0.4746147096157074,
"max": 1.0414643287658691,
"mean": 0.6513273119926453,
"std": 0.04965711012482643,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.4.ff.0.0.weight": {
"min": -0.24834445118904114,
"max": 0.3291000425815582,
"mean": 0.00018075655680149794,
"std": 0.04056985676288605,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.8.4.ff.0.0.bias": {
"min": -0.12541179358959198,
"max": 0.02496136911213398,
"mean": -0.030498577281832695,
"std": 0.017614111304283142,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.8.4.ff.2.weight": {
"min": -0.42039719223976135,
"max": 0.48143431544303894,
"mean": 1.1528718459885567e-06,
"std": 0.03539694473147392,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.8.4.ff.2.bias": {
"min": -0.15133655071258545,
"max": 0.04343574121594429,
"mean": 4.278856431483291e-05,
"std": 0.014885962940752506,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.1.g": {
"min": 0.3155560791492462,
"max": 0.6816220879554749,
"mean": 0.5528930425643921,
"std": 0.04069439694285393,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_q.weight": {
"min": -0.20635411143302917,
"max": 0.21984520554542542,
"mean": 3.190069764968939e-05,
"std": 0.038299400359392166,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_q.bias": {
"min": -0.13771876692771912,
"max": 0.1125807911157608,
"mean": 2.632014366099611e-05,
"std": 0.025809206068515778,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_k.weight": {
"min": -0.4028305411338806,
"max": 0.3708246946334839,
"mean": 2.552652767917607e-05,
"std": 0.03817948326468468,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_k.bias": {
"min": -3.770878791809082,
"max": 2.8686978816986084,
"mean": 0.001155341975390911,
"std": 0.5168278217315674,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_v.weight": {
"min": -0.2037316858768463,
"max": 0.1975933313369751,
"mean": 2.9730301321251318e-05,
"std": 0.03429727256298065,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_v.bias": {
"min": -0.05053940787911415,
"max": 0.039879124611616135,
"mean": -0.00042120314901694655,
"std": 0.013415130786597729,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_out.0.weight": {
"min": -0.19603155553340912,
"max": 0.20171792805194855,
"mean": -1.2456664080673363e-05,
"std": 0.0318053737282753,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_out.0.bias": {
"min": -0.19293668866157532,
"max": 0.19509124755859375,
"mean": -0.0029669972136616707,
"std": 0.06252549588680267,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.3.g": {
"min": 0.348905086517334,
"max": 1.0837733745574951,
"mean": 0.6670998334884644,
"std": 0.05524366348981857,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.4.ff.0.0.weight": {
"min": -0.22561387717723846,
"max": 0.25142621994018555,
"mean": 0.00035854580346494913,
"std": 0.04075940325856209,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.9.4.ff.0.0.bias": {
"min": -0.09107953310012817,
"max": 0.04363439604640007,
"mean": -0.030079854652285576,
"std": 0.017611680552363396,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.9.4.ff.2.weight": {
"min": -0.353360116481781,
"max": 0.30395275354385376,
"mean": -4.4715885451296344e-05,
"std": 0.03712251037359238,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.9.4.ff.2.bias": {
"min": -0.16167744994163513,
"max": 0.06346611678600311,
"mean": -7.887817628215998e-05,
"std": 0.019426995888352394,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.1.g": {
"min": 0.3487050533294678,
"max": 0.7219327092170715,
"mean": 0.5423474907875061,
"std": 0.0390637181699276,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_q.weight": {
"min": -0.21929427981376648,
"max": 0.22339415550231934,
"mean": -1.152800177806057e-05,
"std": 0.039230845868587494,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_q.bias": {
"min": -0.11837491393089294,
"max": 0.17054983973503113,
"mean": 0.0002821336966007948,
"std": 0.025116898119449615,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_k.weight": {
"min": -0.24647969007492065,
"max": 0.3006535768508911,
"mean": -3.7006771890446544e-05,
"std": 0.038930293172597885,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_k.bias": {
"min": -3.505005121231079,
"max": 3.7144510746002197,
"mean": 0.01584703102707863,
"std": 0.782384991645813,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_v.weight": {
"min": -0.21911682188510895,
"max": 0.237393319606781,
"mean": -1.3131610103300773e-05,
"std": 0.03630334511399269,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_v.bias": {
"min": -0.04719853773713112,
"max": 0.051371362060308456,
"mean": 0.00048090319614857435,
"std": 0.013523470610380173,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_out.0.weight": {
"min": -0.21416644752025604,
"max": 0.21722018718719482,
"mean": 5.635957859340124e-05,
"std": 0.033615801483392715,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_out.0.bias": {
"min": -0.2113313376903534,
"max": 0.2312089204788208,
"mean": -0.005099226720631123,
"std": 0.06185970827937126,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.3.g": {
"min": 0.3619273602962494,
"max": 1.1010714769363403,
"mean": 0.699254035949707,
"std": 0.053593844175338745,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.4.ff.0.0.weight": {
"min": -0.23512133955955505,
"max": 0.24475844204425812,
"mean": 0.00046337698586285114,
"std": 0.04126880317926407,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.10.4.ff.0.0.bias": {
"min": -0.09808072447776794,
"max": 0.06809643656015396,
"mean": -0.03143021836876869,
"std": 0.01812811754643917,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.10.4.ff.2.weight": {
"min": -0.3017187714576721,
"max": 0.3516466021537781,
"mean": -8.262180926976725e-05,
"std": 0.040274444967508316,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.10.4.ff.2.bias": {
"min": -0.15225963294506073,
"max": 0.149653360247612,
"mean": 0.00026317729498259723,
"std": 0.023038743063807487,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.1.g": {
"min": 0.9992579817771912,
"max": 1.0015391111373901,
"mean": 1.0000743865966797,
"std": 0.0006371568888425827,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_q.weight": {
"min": -0.03125908225774765,
"max": 0.0312553308904171,
"mean": -1.9290733689558692e-05,
"std": 0.01804095134139061,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_q.bias": {
"min": -0.03122831881046295,
"max": 0.030987922102212906,
"mean": -0.001084161689504981,
"std": 0.017950566485524178,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_k.weight": {
"min": -0.031255997717380524,
"max": 0.031259775161743164,
"mean": 3.548155291355215e-06,
"std": 0.01804135926067829,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_k.bias": {
"min": -0.031154237687587738,
"max": 0.03117459826171398,
"mean": 0.0003339198010507971,
"std": 0.018062766641378403,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_out.0.bias": {
"min": -0.000624487001914531,
"max": 0.0007099520298652351,
"mean": 4.385071406431962e-06,
"std": 0.00018961619934998453,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.3.g": {
"min": 0.99758380651474,
"max": 1.0029877424240112,
"mean": 0.9999918341636658,
"std": 0.0008515770896337926,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.4.ff.0.0.weight": {
"min": -0.0335661917924881,
"max": 0.03370394930243492,
"mean": -6.065281013434287e-06,
"std": 0.018047738820314407,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.11.4.ff.0.0.bias": {
"min": -0.03307846933603287,
"max": 0.033399470150470734,
"mean": -0.00018566125072538853,
"std": 0.017954055219888687,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.11.4.ff.2.weight": {
"min": -0.001481670537032187,
"max": 0.001570003922097385,
"mean": 1.885646042865119e-06,
"std": 0.0002906274457927793,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.11.4.ff.2.bias": {
"min": -0.0005839330260641873,
"max": 0.0007720313151367009,
"mean": 7.4740901254699565e-06,
"std": 0.00017145519086625427,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.1.g": {
"min": 0.3833076059818268,
"max": 0.7191433310508728,
"mean": 0.5806823968887329,
"std": 0.03885458782315254,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_q.weight": {
"min": -0.23893095552921295,
"max": 0.19658136367797852,
"mean": 2.6083449483849108e-05,
"std": 0.03746617212891579,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_q.bias": {
"min": -0.11882374435663223,
"max": 0.16677531599998474,
"mean": 0.0009812903590500355,
"std": 0.027557166293263435,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_k.weight": {
"min": -0.24655194580554962,
"max": 0.49992480874061584,
"mean": -5.045527359470725e-05,
"std": 0.0376235656440258,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_k.bias": {
"min": -3.941847801208496,
"max": 3.7689895629882812,
"mean": -0.0035720239393413067,
"std": 0.6813404560089111,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_v.weight": {
"min": -0.22746945917606354,
"max": 0.25183355808258057,
"mean": -1.1859048754558899e-05,
"std": 0.037434790283441544,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_v.bias": {
"min": -0.07158222794532776,
"max": 0.08058217912912369,
"mean": -0.0005094742518849671,
"std": 0.01565464586019516,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_out.0.weight": {
"min": -0.22813726961612701,
"max": 0.2576807737350464,
"mean": -2.8760241548297927e-05,
"std": 0.03542162850499153,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_out.0.bias": {
"min": -0.20052045583724976,
"max": 0.21483510732650757,
"mean": -0.005527016241103411,
"std": 0.06832844763994217,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.3.g": {
"min": 0.40501996874809265,
"max": 1.1893715858459473,
"mean": 0.7378885746002197,
"std": 0.055228959769010544,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.4.ff.0.0.weight": {
"min": -0.22087574005126953,
"max": 0.2456100732088089,
"mean": 0.0005211896495893598,
"std": 0.04133577644824982,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.12.4.ff.0.0.bias": {
"min": -0.1032254695892334,
"max": 0.024186622351408005,
"mean": -0.03266698122024536,
"std": 0.018890688195824623,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.12.4.ff.2.weight": {
"min": -0.44966718554496765,
"max": 0.4224751591682434,
"mean": -0.00043509487295523286,
"std": 0.04689602553844452,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.12.4.ff.2.bias": {
"min": -0.2515262961387634,
"max": 0.47013524174690247,
"mean": 0.0032045203261077404,
"std": 0.04452691972255707,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.0.weight": {
"min": -0.31688347458839417,
"max": 0.33314356207847595,
"mean": -2.516225868021138e-05,
"std": 0.021287811920046806,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.13.1.g": {
"min": 0.32447901368141174,
"max": 0.6856404542922974,
"mean": 0.5710100531578064,
"std": 0.04470637068152428,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_q.weight": {
"min": -0.1645602136850357,
"max": 0.17448709905147552,
"mean": -4.871720739174634e-05,
"std": 0.033182382583618164,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_q.bias": {
"min": -0.1869296431541443,
"max": 0.14326152205467224,
"mean": 3.4562835935503244e-05,
"std": 0.029701465740799904,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_k.weight": {
"min": -0.3810564875602722,
"max": 0.24595260620117188,
"mean": -9.857794793788344e-06,
"std": 0.032763585448265076,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_k.bias": {
"min": -3.6554818153381348,
"max": 3.289768695831299,
"mean": -0.014251366257667542,
"std": 0.9850791096687317,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_v.weight": {
"min": -0.2347586303949356,
"max": 0.24735252559185028,
"mean": -1.8151138647226617e-05,
"std": 0.041698191314935684,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_v.bias": {
"min": -0.07252755761146545,
"max": 0.154456228017807,
"mean": 0.0006656115292571485,
"std": 0.025164911523461342,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_out.0.weight": {
"min": -0.2663172781467438,
"max": 0.24813731014728546,
"mean": -1.5164550859481096e-05,
"std": 0.04013926163315773,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_out.0.bias": {
"min": -0.18959401547908783,
"max": 0.19463232159614563,
"mean": -0.0012374802026897669,
"std": 0.06668464839458466,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.3.g": {
"min": 0.32920119166374207,
"max": 0.99962317943573,
"mean": 0.7191556692123413,
"std": 0.052332110702991486,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.4.ff.0.0.weight": {
"min": -0.2317013144493103,
"max": 0.24530917406082153,
"mean": 0.00018264415848534554,
"std": 0.04090017080307007,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.13.4.ff.0.0.bias": {
"min": -0.11453195661306381,
"max": 0.01904553547501564,
"mean": -0.04247689247131348,
"std": 0.01886470802128315,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.13.4.ff.2.weight": {
"min": -0.38964197039604187,
"max": 0.4074561595916748,
"mean": -2.184425829909742e-05,
"std": 0.048533279448747635,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.13.4.ff.2.bias": {
"min": -0.692954957485199,
"max": 0.41268306970596313,
"mean": 0.0008480865508317947,
"std": 0.060282234102487564,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.0.weight": {
"min": -0.0014125935267657042,
"max": 1.0007404088974,
"mean": 0.00048819734365679324,
"std": 0.02208949252963066,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.14.1.g": {
"min": 0.9992830157279968,
"max": 1.0015500783920288,
"mean": 1.0000728368759155,
"std": 0.0006243661628104746,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_q.weight": {
"min": -0.03125380352139473,
"max": 0.03125770390033722,
"mean": -2.1020379790570587e-05,
"std": 0.018032291904091835,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_q.bias": {
"min": -0.031215310096740723,
"max": 0.031232142820954323,
"mean": -0.0006769997999072075,
"std": 0.017826829105615616,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_k.weight": {
"min": -0.03125686198472977,
"max": 0.03126228600740433,
"mean": -8.83147367858328e-06,
"std": 0.018031319603323936,
"sparsity": 9.5367431640625e-07,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_k.bias": {
"min": -0.031232059001922607,
"max": 0.031244752928614616,
"mean": -0.0007297524134628475,
"std": 0.017941756173968315,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_out.0.bias": {
"min": -0.0005121154244989157,
"max": 0.000419745163526386,
"mean": -3.856697276205523e-06,
"std": 0.00015613996947649866,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.3.g": {
"min": 0.9973074197769165,
"max": 1.0023618936538696,
"mean": 0.9995496869087219,
"std": 0.0008333163568750024,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.4.ff.0.0.weight": {
"min": -0.03326094523072243,
"max": 0.03284362331032753,
"mean": -2.9510356398532167e-06,
"std": 0.018027810379862785,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.14.4.ff.0.0.bias": {
"min": -0.03245115652680397,
"max": 0.03129417076706886,
"mean": -0.0005187825299799442,
"std": 0.018035637214779854,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.14.4.ff.2.weight": {
"min": -0.001710034441202879,
"max": 0.001517186756245792,
"mean": -1.1187451036676066e-06,
"std": 0.00028821235173381865,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.14.4.ff.2.bias": {
"min": -0.0004745775950141251,
"max": 0.00038665023748762906,
"mean": -3.4791635243891506e-06,
"std": 0.00014281406765803695,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.0.weight": {
"min": -0.23430979251861572,
"max": 0.27249982953071594,
"mean": 6.625029982387787e-06,
"std": 0.018810328096151352,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.15.1.g": {
"min": 0.32144901156425476,
"max": 0.6939529180526733,
"mean": 0.5816143751144409,
"std": 0.04593788832426071,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_q.weight": {
"min": -0.18192411959171295,
"max": 0.19777271151542664,
"mean": -1.1577552868402563e-05,
"std": 0.03318414464592934,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_q.bias": {
"min": -0.16048845648765564,
"max": 0.12929441034793854,
"mean": -0.0010730556678026915,
"std": 0.03413493558764458,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_k.weight": {
"min": -0.3323971629142761,
"max": 0.31116729974746704,
"mean": -1.0262037903885357e-05,
"std": 0.032234691083431244,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_k.bias": {
"min": -7.802551746368408,
"max": 8.761726379394531,
"mean": 0.0934542790055275,
"std": 1.6194651126861572,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_v.weight": {
"min": -0.23397472500801086,
"max": 0.24182309210300446,
"mean": 4.162585537414998e-05,
"std": 0.040856119245290756,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_v.bias": {
"min": -0.07595551013946533,
"max": 0.06575819849967957,
"mean": 0.00048204767517745495,
"std": 0.019416553899645805,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_out.0.weight": {
"min": -0.24591538310050964,
"max": 0.23388886451721191,
"mean": -3.2548523449804634e-06,
"std": 0.039430882781744,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_out.0.bias": {
"min": -0.16298307478427887,
"max": 0.16088849306106567,
"mean": 0.0016233095666393638,
"std": 0.06529011577367783,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.3.g": {
"min": 0.5571500658988953,
"max": 0.9436134696006775,
"mean": 0.7128155827522278,
"std": 0.0401235930621624,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.4.ff.0.0.weight": {
"min": -0.22800666093826294,
"max": 0.2548002004623413,
"mean": -4.557950160233304e-05,
"std": 0.0405743233859539,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.15.4.ff.0.0.bias": {
"min": -0.13472457230091095,
"max": 0.022118322551250458,
"mean": -0.04135219752788544,
"std": 0.01838735118508339,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.15.4.ff.2.weight": {
"min": -0.42162197828292847,
"max": 0.39239510893821716,
"mean": -4.3281570469844155e-06,
"std": 0.0477834977209568,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.15.4.ff.2.bias": {
"min": -0.6071848273277283,
"max": 0.6512866020202637,
"mean": 0.0015846553724259138,
"std": 0.05683678016066551,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.0.weight": {
"min": -0.25181835889816284,
"max": 0.32083579897880554,
"mean": -6.167530045786407e-06,
"std": 0.01961352303624153,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.16.1.g": {
"min": 0.35955187678337097,
"max": 0.6821539998054504,
"mean": 0.5706835389137268,
"std": 0.04298859089612961,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_q.weight": {
"min": -0.22016161680221558,
"max": 0.17701253294944763,
"mean": -3.445023321546614e-05,
"std": 0.03429866582155228,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_q.bias": {
"min": -0.16314493119716644,
"max": 0.23276831209659576,
"mean": 0.000363295606803149,
"std": 0.032813575118780136,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_k.weight": {
"min": -0.26391661167144775,
"max": 0.23982854187488556,
"mean": -5.2968603995395824e-05,
"std": 0.03389734774827957,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_k.bias": {
"min": -4.854297161102295,
"max": 5.090524673461914,
"mean": 0.04387897625565529,
"std": 1.229095458984375,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_v.weight": {
"min": -0.24643158912658691,
"max": 0.2503342926502228,
"mean": 7.21608375897631e-05,
"std": 0.04398628696799278,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_v.bias": {
"min": -0.06249094381928444,
"max": 0.05441959202289581,
"mean": 0.0006457456620410085,
"std": 0.017188476398587227,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_out.0.weight": {
"min": -0.28642886877059937,
"max": 0.2721048593521118,
"mean": -5.0093196477973834e-05,
"std": 0.04298442229628563,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_out.0.bias": {
"min": -0.16100700199604034,
"max": 0.1703459769487381,
"mean": -0.002886796835809946,
"std": 0.05929969996213913,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.3.g": {
"min": 0.5198073983192444,
"max": 0.9330060482025146,
"mean": 0.7133970260620117,
"std": 0.03842265531420708,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.4.ff.0.0.weight": {
"min": -0.2378769814968109,
"max": 0.2487393021583557,
"mean": 0.00046459035365842283,
"std": 0.04045308753848076,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.16.4.ff.0.0.bias": {
"min": -0.1450035721063614,
"max": 0.0410858653485775,
"mean": -0.03969570994377136,
"std": 0.020541729405522346,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.16.4.ff.2.weight": {
"min": -0.5323667526245117,
"max": 0.5824663043022156,
"mean": 5.913888344366569e-06,
"std": 0.048858821392059326,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.16.4.ff.2.bias": {
"min": -0.5189786553382874,
"max": 0.49333813786506653,
"mean": 0.0023667975328862667,
"std": 0.0534440316259861,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.0.weight": {
"min": -0.2737034261226654,
"max": 0.31558021903038025,
"mean": 1.935112777573522e-06,
"std": 0.02005006931722164,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.17.1.g": {
"min": 0.36589479446411133,
"max": 0.7117040157318115,
"mean": 0.5931321382522583,
"std": 0.0459616482257843,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_q.weight": {
"min": -0.21081827580928802,
"max": 0.19904154539108276,
"mean": 3.062835457967594e-05,
"std": 0.03486720845103264,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_q.bias": {
"min": -0.18713217973709106,
"max": 0.20344023406505585,
"mean": 0.000952105619944632,
"std": 0.031497493386268616,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_k.weight": {
"min": -0.28968340158462524,
"max": 0.33981209993362427,
"mean": -4.6875291445758194e-05,
"std": 0.03458764776587486,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_k.bias": {
"min": -3.876854181289673,
"max": 3.3869495391845703,
"mean": 0.014455719850957394,
"std": 0.8583089709281921,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_v.weight": {
"min": -0.22449138760566711,
"max": 0.2498161643743515,
"mean": -3.885651949531166e-06,
"std": 0.04222925379872322,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_v.bias": {
"min": -0.05526581034064293,
"max": 0.04652895778417587,
"mean": -2.1849831682629883e-05,
"std": 0.015840303152799606,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_out.0.weight": {
"min": -0.2932976484298706,
"max": 0.29035061597824097,
"mean": -7.6227315730648115e-06,
"std": 0.041944343596696854,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_out.0.bias": {
"min": -0.12483495473861694,
"max": 0.2589971721172333,
"mean": -0.003243764629587531,
"std": 0.05317297205328941,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.3.g": {
"min": 0.45624497532844543,
"max": 0.8444257378578186,
"mean": 0.705470621585846,
"std": 0.03522758185863495,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.4.ff.0.0.weight": {
"min": -0.5120490789413452,
"max": 0.3481951355934143,
"mean": 0.00034297475940547884,
"std": 0.040198490023612976,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.17.4.ff.0.0.bias": {
"min": -0.18573546409606934,
"max": 0.03953690081834793,
"mean": -0.03938683122396469,
"std": 0.021360911428928375,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.17.4.ff.2.weight": {
"min": -0.5439703464508057,
"max": 0.5556368231773376,
"mean": -7.127778371796012e-05,
"std": 0.05073383450508118,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.17.4.ff.2.bias": {
"min": -0.5116579532623291,
"max": 0.6641839742660522,
"mean": 0.0024420106783509254,
"std": 0.04951965808868408,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.0.weight": {
"min": -0.33250588178634644,
"max": 0.2653454840183258,
"mean": 3.314120021968847e-06,
"std": 0.019387103617191315,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.18.1.g": {
"min": 0.32199233770370483,
"max": 0.7664577960968018,
"mean": 0.6510406136512756,
"std": 0.04532792791724205,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_q.weight": {
"min": -0.24981507658958435,
"max": 0.21987095475196838,
"mean": -1.8786176951834932e-06,
"std": 0.03650160878896713,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_q.bias": {
"min": -0.32696181535720825,
"max": 0.286738783121109,
"mean": -0.0006850577774457633,
"std": 0.038556959480047226,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_k.weight": {
"min": -0.310026079416275,
"max": 0.3700660765171051,
"mean": 6.51663140160963e-05,
"std": 0.03624221682548523,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_k.bias": {
"min": -4.716763496398926,
"max": 5.807004928588867,
"mean": 0.03795414790511131,
"std": 1.4130035638809204,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_v.weight": {
"min": -0.22150921821594238,
"max": 0.20585696399211884,
"mean": -7.512117736041546e-05,
"std": 0.0424848347902298,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_v.bias": {
"min": -0.07765647768974304,
"max": 0.05150295048952103,
"mean": -0.0009257810888811946,
"std": 0.01641261577606201,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_out.0.weight": {
"min": -0.3305484354496002,
"max": 0.3292558491230011,
"mean": -4.674302545026876e-06,
"std": 0.042791128158569336,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_out.0.bias": {
"min": -0.2847578823566437,
"max": 0.11202681809663773,
"mean": -0.0012038333807140589,
"std": 0.04701409116387367,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.3.g": {
"min": 0.4860534965991974,
"max": 0.8868187069892883,
"mean": 0.7373650074005127,
"std": 0.03824280574917793,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.4.ff.0.0.weight": {
"min": -0.3623279929161072,
"max": 0.2745623290538788,
"mean": 5.109083213028498e-05,
"std": 0.04064391553401947,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.18.4.ff.0.0.bias": {
"min": -0.24753618240356445,
"max": 0.046382758766412735,
"mean": -0.039263010025024414,
"std": 0.023289302363991737,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.18.4.ff.2.weight": {
"min": -0.6261420249938965,
"max": 0.5965140461921692,
"mean": -5.986806354485452e-05,
"std": 0.05311597138643265,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.18.4.ff.2.bias": {
"min": -0.7094455361366272,
"max": 0.2657928168773651,
"mean": 0.0009170880075544119,
"std": 0.05122483894228935,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.0.weight": {
"min": -0.3433697819709778,
"max": 0.30368947982788086,
"mean": 2.3889015210443176e-07,
"std": 0.019135670736432076,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.19.1.g": {
"min": 0.3497507870197296,
"max": 0.7829343676567078,
"mean": 0.638809323310852,
"std": 0.04924893379211426,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_q.weight": {
"min": -0.20543725788593292,
"max": 0.20679403841495514,
"mean": -5.990585486870259e-05,
"std": 0.037696100771427155,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_q.bias": {
"min": -0.25862252712249756,
"max": 0.26803287863731384,
"mean": -0.00040157014154829085,
"std": 0.04459596797823906,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_k.weight": {
"min": -0.3540649712085724,
"max": 0.32237085700035095,
"mean": -6.968005436647218e-06,
"std": 0.03720472380518913,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_k.bias": {
"min": -5.260965824127197,
"max": 4.203993797302246,
"mean": -0.026412349194288254,
"std": 1.006641149520874,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_v.weight": {
"min": -0.23861756920814514,
"max": 0.24335098266601562,
"mean": -2.5078054022742435e-05,
"std": 0.043209534138441086,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_v.bias": {
"min": -0.06233251839876175,
"max": 0.056672900915145874,
"mean": 0.00034255694481544197,
"std": 0.014151446521282196,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_out.0.weight": {
"min": -0.4369187653064728,
"max": 0.373432457447052,
"mean": 1.4437458048632834e-05,
"std": 0.044120825827121735,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_out.0.bias": {
"min": -0.09643160551786423,
"max": 0.1759035885334015,
"mean": -0.0006591043202206492,
"std": 0.035157084465026855,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.3.g": {
"min": 0.4216560423374176,
"max": 1.0694262981414795,
"mean": 0.7483175992965698,
"std": 0.04205932468175888,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.4.ff.0.0.weight": {
"min": -0.2665790617465973,
"max": 0.29692915081977844,
"mean": -7.955127512104809e-05,
"std": 0.04080403223633766,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.19.4.ff.0.0.bias": {
"min": -0.18574701249599457,
"max": 0.043912798166275024,
"mean": -0.03681863471865654,
"std": 0.025608953088521957,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.19.4.ff.2.weight": {
"min": -0.45691967010498047,
"max": 0.486579954624176,
"mean": 4.3823405576404184e-05,
"std": 0.05420882627367973,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.19.4.ff.2.bias": {
"min": -0.28651049733161926,
"max": 0.5512732267379761,
"mean": -0.0008804658427834511,
"std": 0.04782622680068016,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.0.weight": {
"min": -0.2928602397441864,
"max": 0.3227991461753845,
"mean": 6.5394251578254625e-06,
"std": 0.019969874992966652,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.20.1.g": {
"min": 0.2909410297870636,
"max": 0.7601505517959595,
"mean": 0.6508233547210693,
"std": 0.05213586986064911,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_q.weight": {
"min": -0.2434667944908142,
"max": 0.2616351246833801,
"mean": -6.0445322560553905e-06,
"std": 0.039612967520952225,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_q.bias": {
"min": -0.2675444483757019,
"max": 0.1998518854379654,
"mean": -0.0008808361599221826,
"std": 0.05175328254699707,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_k.weight": {
"min": -0.2721408009529114,
"max": 0.2537347078323364,
"mean": 4.015575541416183e-06,
"std": 0.03871006891131401,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_k.bias": {
"min": -12.963685989379883,
"max": 15.945606231689453,
"mean": 0.033225029706954956,
"std": 1.9889812469482422,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_v.weight": {
"min": -0.20711851119995117,
"max": 0.22583316266536713,
"mean": -7.227503374451771e-05,
"std": 0.04055361449718475,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_v.bias": {
"min": -0.06934971362352371,
"max": 0.06323137879371643,
"mean": 0.00015275523765012622,
"std": 0.014742234721779823,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_out.0.weight": {
"min": -0.4650252163410187,
"max": 0.3206908702850342,
"mean": 1.950068872247357e-05,
"std": 0.04058856889605522,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_out.0.bias": {
"min": -0.06406640261411667,
"max": 0.11521138995885849,
"mean": 0.0011922243284061551,
"std": 0.02470523677766323,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.3.g": {
"min": 0.3746338486671448,
"max": 0.9322722554206848,
"mean": 0.7508488893508911,
"std": 0.040187884122133255,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.4.ff.0.0.weight": {
"min": -0.2793060839176178,
"max": 0.2731705904006958,
"mean": -0.00016857523587532341,
"std": 0.04099458083510399,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.20.4.ff.0.0.bias": {
"min": -0.198820099234581,
"max": 0.05085344612598419,
"mean": -0.03202417492866516,
"std": 0.025111379101872444,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.20.4.ff.2.weight": {
"min": -0.6573337912559509,
"max": 0.5352881550788879,
"mean": -4.8675712605472654e-05,
"std": 0.05284544453024864,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.20.4.ff.2.bias": {
"min": -0.19310522079467773,
"max": 0.5820621848106384,
"mean": -0.000515035935677588,
"std": 0.04106917232275009,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.0.weight": {
"min": -0.4177176356315613,
"max": 0.37193918228149414,
"mean": 6.035062597220531e-06,
"std": 0.02162161096930504,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.21.1.g": {
"min": 0.21426703035831451,
"max": 0.7471129894256592,
"mean": 0.649559497833252,
"std": 0.05437251552939415,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_q.weight": {
"min": -0.20954684913158417,
"max": 0.19578267633914948,
"mean": 4.0035050915321335e-05,
"std": 0.03946496546268463,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_q.bias": {
"min": -0.3292764723300934,
"max": 0.2593560516834259,
"mean": -0.0032243705354630947,
"std": 0.056255340576171875,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_k.weight": {
"min": -0.20562483370304108,
"max": 0.2547135651111603,
"mean": 5.434878767118789e-05,
"std": 0.038567062467336655,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_k.bias": {
"min": -6.242823600769043,
"max": 6.931674957275391,
"mean": 0.04833440110087395,
"std": 1.384947657585144,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_v.weight": {
"min": -0.20960542559623718,
"max": 0.2301599383354187,
"mean": -5.232992862147512e-06,
"std": 0.04131288081407547,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_v.bias": {
"min": -0.04387415945529938,
"max": 0.03594405576586723,
"mean": 4.847475793212652e-06,
"std": 0.012800573371350765,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_out.0.weight": {
"min": -0.3978384733200073,
"max": 0.34482401609420776,
"mean": -5.554188828682527e-05,
"std": 0.04238930344581604,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_out.0.bias": {
"min": -0.05505719780921936,
"max": 0.06286165118217468,
"mean": 0.00037010322557762265,
"std": 0.018672354519367218,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.3.g": {
"min": 0.3501102924346924,
"max": 1.0451011657714844,
"mean": 0.7893368601799011,
"std": 0.04874463006854057,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.4.ff.0.0.weight": {
"min": -0.33344367146492004,
"max": 0.3858579397201538,
"mean": -0.00016948734992183745,
"std": 0.041480328887701035,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.21.4.ff.0.0.bias": {
"min": -0.15724380314350128,
"max": 0.05914618447422981,
"mean": -0.03183374181389809,
"std": 0.0251409150660038,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.21.4.ff.2.weight": {
"min": -0.6964119076728821,
"max": 0.4686836302280426,
"mean": -9.159947512671351e-05,
"std": 0.05179150402545929,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.21.4.ff.2.bias": {
"min": -0.24826322495937347,
"max": 0.3285461962223053,
"mean": -0.00024742598179727793,
"std": 0.0414327010512352,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.0.weight": {
"min": -0.2872468829154968,
"max": 0.35023656487464905,
"mean": -2.1327541617210954e-06,
"std": 0.024238986894488335,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.22.1.g": {
"min": 0.19656415283679962,
"max": 0.7792240381240845,
"mean": 0.6702939867973328,
"std": 0.05869279056787491,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_q.weight": {
"min": -0.22861742973327637,
"max": 0.2311892956495285,
"mean": -1.9813087419606745e-05,
"std": 0.04044097661972046,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_q.bias": {
"min": -0.2196549028158188,
"max": 0.24067850410938263,
"mean": 0.0007784939371049404,
"std": 0.055799830704927444,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_k.weight": {
"min": -0.21562136709690094,
"max": 0.22666974365711212,
"mean": -7.154869672376662e-05,
"std": 0.039377160370349884,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_k.bias": {
"min": -8.90437126159668,
"max": 9.067243576049805,
"mean": -0.0012503080070018768,
"std": 1.8481035232543945,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_v.weight": {
"min": -0.2693117558956146,
"max": 0.2589534521102905,
"mean": 4.357095167506486e-05,
"std": 0.038407646119594574,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_v.bias": {
"min": -0.05761706829071045,
"max": 0.05768207088112831,
"mean": 0.0003497683210298419,
"std": 0.01472416240721941,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_out.0.weight": {
"min": -0.2650063633918762,
"max": 0.2886802554130554,
"mean": -6.175818271003664e-05,
"std": 0.039074014872312546,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_out.0.bias": {
"min": -0.04376920685172081,
"max": 0.03731464967131615,
"mean": -8.56523183756508e-05,
"std": 0.013365812599658966,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.3.g": {
"min": 0.33950644731521606,
"max": 1.0926629304885864,
"mean": 0.8637055158615112,
"std": 0.06385361403226852,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.4.ff.0.0.weight": {
"min": -0.42327165603637695,
"max": 0.41919341683387756,
"mean": 0.00031273282365873456,
"std": 0.0435028038918972,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.22.4.ff.0.0.bias": {
"min": -0.21477100253105164,
"max": 0.17062197625637054,
"mean": -0.02948208898305893,
"std": 0.0319497250020504,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.22.4.ff.2.weight": {
"min": -0.5996779799461365,
"max": 0.5596659183502197,
"mean": -0.00015256987535394728,
"std": 0.05344602093100548,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.22.4.ff.2.bias": {
"min": -0.17847254872322083,
"max": 0.37667688727378845,
"mean": 0.0013643621932715178,
"std": 0.03730973228812218,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.0.weight": {
"min": -0.3942721486091614,
"max": 0.36895284056663513,
"mean": 3.6433208151720464e-05,
"std": 0.028621351346373558,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.23.1.g": {
"min": 0.2903022766113281,
"max": 0.826566219329834,
"mean": 0.7055737376213074,
"std": 0.06789274513721466,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_q.weight": {
"min": -0.9261494278907776,
"max": 1.0264488458633423,
"mean": -2.5618217478040606e-05,
"std": 0.04762551560997963,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_q.bias": {
"min": -0.8783111572265625,
"max": 0.81496262550354,
"mean": -0.0003140262851957232,
"std": 0.09553777426481247,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_k.weight": {
"min": -0.269389808177948,
"max": 0.24095474183559418,
"mean": -2.2922709831618704e-05,
"std": 0.0389564111828804,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_k.bias": {
"min": -23.739809036254883,
"max": 22.848268508911133,
"mean": -0.0918719619512558,
"std": 4.069859504699707,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_v.weight": {
"min": -0.22777613997459412,
"max": 0.24508334696292877,
"mean": -2.581250009825453e-05,
"std": 0.038639314472675323,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_v.bias": {
"min": -0.06041834130883217,
"max": 0.04605862498283386,
"mean": -0.00014601447037421167,
"std": 0.014698855578899384,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_out.0.weight": {
"min": -0.3384549617767334,
"max": 0.37450915575027466,
"mean": 7.243736035889015e-06,
"std": 0.04081535339355469,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_out.0.bias": {
"min": -0.046464741230010986,
"max": 0.19570393860340118,
"mean": 0.0002726423554122448,
"std": 0.013569480739533901,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.3.g": {
"min": 0.37450751662254333,
"max": 1.1300209760665894,
"mean": 0.8900179862976074,
"std": 0.06398562341928482,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.4.ff.0.0.weight": {
"min": -0.4477962851524353,
"max": 0.5424686074256897,
"mean": 2.4588229280197993e-05,
"std": 0.04556749016046524,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.23.4.ff.0.0.bias": {
"min": -0.22407831251621246,
"max": 0.08827000111341476,
"mean": -0.032015662640333176,
"std": 0.03776349499821663,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.23.4.ff.2.weight": {
"min": -0.7253148555755615,
"max": 0.6892704367637634,
"mean": 3.4532837162259966e-05,
"std": 0.051778074353933334,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.23.4.ff.2.bias": {
"min": -0.174549400806427,
"max": 0.21855904161930084,
"mean": 3.998563624918461e-05,
"std": 0.03177855163812637,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.0.weight": {
"min": -0.34027042984962463,
"max": 0.37425076961517334,
"mean": 4.2934465454891324e-05,
"std": 0.03414500877261162,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.24.1.g": {
"min": 0.31756624579429626,
"max": 1.2868921756744385,
"mean": 0.6014678478240967,
"std": 0.08346211910247803,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_q.weight": {
"min": -0.2833472490310669,
"max": 0.26022085547447205,
"mean": -3.076446546401712e-06,
"std": 0.03598489984869957,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_q.bias": {
"min": -0.2355138659477234,
"max": 0.2053714245557785,
"mean": 0.0002318831393495202,
"std": 0.05601060390472412,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_k.weight": {
"min": -0.43542587757110596,
"max": 0.32521018385887146,
"mean": 2.451425461913459e-05,
"std": 0.034135881811380386,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_k.bias": {
"min": -5.544894218444824,
"max": 7.312623977661133,
"mean": -0.007366415113210678,
"std": 0.6992328763008118,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_v.weight": {
"min": -0.34384819865226746,
"max": 0.3634955585002899,
"mean": 0.00010338952415622771,
"std": 0.047827959060668945,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_v.bias": {
"min": -0.07375156134366989,
"max": 0.06036222726106644,
"mean": 0.0009326444123871624,
"std": 0.014949453994631767,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_out.0.weight": {
"min": -0.25554174184799194,
"max": 0.28655222058296204,
"mean": 4.425931365403812e-06,
"std": 0.04155518114566803,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_out.0.bias": {
"min": -0.05532686412334442,
"max": 0.06282556056976318,
"mean": 0.00014147879846859723,
"std": 0.0071739982813596725,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.3.g": {
"min": 0.49367740750312805,
"max": 1.2208529710769653,
"mean": 1.0134257078170776,
"std": 0.11743961274623871,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.4.ff.0.0.weight": {
"min": -1.0936145782470703,
"max": 1.0469423532485962,
"mean": -4.9777743697632104e-05,
"std": 0.05241077393293381,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.24.4.ff.0.0.bias": {
"min": -0.2236752212047577,
"max": 0.1727852076292038,
"mean": -0.027246128767728806,
"std": 0.03635065257549286,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.24.4.ff.2.weight": {
"min": -0.8845650553703308,
"max": 0.9224934577941895,
"mean": -0.00014609616482630372,
"std": 0.05328214913606644,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.24.4.ff.2.bias": {
"min": -0.17102202773094177,
"max": 0.3799096643924713,
"mean": 0.0033686563838273287,
"std": 0.039898186922073364,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.0.weight": {
"min": -0.7772517800331116,
"max": 0.7235067486763,
"mean": 1.9145372789353132e-05,
"std": 0.04616532474756241,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.25.1.g": {
"min": 0.3385593295097351,
"max": 1.4277493953704834,
"mean": 0.948319673538208,
"std": 0.20673821866512299,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_q.weight": {
"min": -1.7455862760543823,
"max": 1.7045449018478394,
"mean": 0.00022695529332850128,
"std": 0.1586858183145523,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_q.bias": {
"min": -1.1996264457702637,
"max": 1.0995841026306152,
"mean": -0.009535307995975018,
"std": 0.20383313298225403,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_k.weight": {
"min": -0.42129284143447876,
"max": 0.42636537551879883,
"mean": 6.450986256822944e-05,
"std": 0.04801839217543602,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_k.bias": {
"min": -19.74388885498047,
"max": 19.53899383544922,
"mean": -0.24829958379268646,
"std": 4.776181221008301,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_v.weight": {
"min": -0.32387763261795044,
"max": 0.43839961290359497,
"mean": -1.2020052054140251e-05,
"std": 0.04616132006049156,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_v.bias": {
"min": -0.03405960276722908,
"max": 0.03712477907538414,
"mean": 0.000642063794657588,
"std": 0.012921381741762161,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_out.0.weight": {
"min": -0.7034934759140015,
"max": 0.6645202040672302,
"mean": 4.349739174358547e-05,
"std": 0.05788357928395271,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_out.0.bias": {
"min": -0.07222776859998703,
"max": 0.06750176101922989,
"mean": -0.00013276952086016536,
"std": 0.012919425964355469,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.3.g": {
"min": 0.38018617033958435,
"max": 1.3909327983856201,
"mean": 1.0665558576583862,
"std": 0.21971333026885986,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.4.ff.0.0.weight": {
"min": -0.616411030292511,
"max": 0.71701979637146,
"mean": 0.00011130145139759406,
"std": 0.05802119895815849,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.25.4.ff.0.0.bias": {
"min": -0.2196073830127716,
"max": 0.22519457340240479,
"mean": 0.006242883857339621,
"std": 0.049728427082300186,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.25.4.ff.2.weight": {
"min": -0.6296560168266296,
"max": 0.889208972454071,
"mean": 1.1700575669237878e-05,
"std": 0.023527782410383224,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.25.4.ff.2.bias": {
"min": -0.5068318247795105,
"max": 0.47398847341537476,
"mean": -0.0030159649904817343,
"std": 0.06930278241634369,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.norm_out.g": {
"min": 0.5377185344696045,
"max": 1.1807185411453247,
"mean": 0.782741904258728,
"std": 0.09885998070240021,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.proj_out.weight": {
"min": -0.2669522166252136,
"max": 0.2126760631799698,
"mean": -0.00022303443984128535,
"std": 0.053996436297893524,
"sparsity": 0.0,
"shape": [
100,
1024
]
},
"transformer.proj_out.bias": {
"min": -0.23790661990642548,
"max": 0.01483356487005949,
"mean": -0.043959345668554306,
"std": 0.03433229774236679,
"sparsity": 0.0,
"shape": [
100
]
}
}
}