zombieX3 / model_analysis.json
memevis's picture
Upload folder using huggingface_hub
7465f59 verified
{
"layer_types": {
"transformer": 391
},
"parameter_counts": {
"transformer.time_embed.time_mlp.0.weight": 262144,
"transformer.time_embed.time_mlp.0.bias": 1024,
"transformer.time_embed.time_mlp.2.weight": 1048576,
"transformer.time_embed.time_mlp.2.bias": 1024,
"transformer.text_embed.text_embed.weight": 254600,
"transformer.input_embed.proj.weight": 307200,
"transformer.input_embed.proj.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024,
"transformer.layers.0.1.g": 1024,
"transformer.layers.0.2.to_q.weight": 1048576,
"transformer.layers.0.2.to_q.bias": 1024,
"transformer.layers.0.2.to_k.weight": 1048576,
"transformer.layers.0.2.to_k.bias": 1024,
"transformer.layers.0.2.to_v.weight": 1048576,
"transformer.layers.0.2.to_v.bias": 1024,
"transformer.layers.0.2.to_out.0.weight": 1048576,
"transformer.layers.0.2.to_out.0.bias": 1024,
"transformer.layers.0.3.g": 1024,
"transformer.layers.0.4.ff.0.0.weight": 4194304,
"transformer.layers.0.4.ff.0.0.bias": 4096,
"transformer.layers.0.4.ff.2.weight": 4194304,
"transformer.layers.0.4.ff.2.bias": 1024,
"transformer.layers.1.1.g": 1024,
"transformer.layers.1.2.to_q.weight": 1048576,
"transformer.layers.1.2.to_q.bias": 1024,
"transformer.layers.1.2.to_k.weight": 1048576,
"transformer.layers.1.2.to_k.bias": 1024,
"transformer.layers.1.2.to_v.weight": 1048576,
"transformer.layers.1.2.to_v.bias": 1024,
"transformer.layers.1.2.to_out.0.weight": 1048576,
"transformer.layers.1.2.to_out.0.bias": 1024,
"transformer.layers.1.3.g": 1024,
"transformer.layers.1.4.ff.0.0.weight": 4194304,
"transformer.layers.1.4.ff.0.0.bias": 4096,
"transformer.layers.1.4.ff.2.weight": 4194304,
"transformer.layers.1.4.ff.2.bias": 1024,
"transformer.layers.2.1.g": 1024,
"transformer.layers.2.2.to_q.weight": 1048576,
"transformer.layers.2.2.to_q.bias": 1024,
"transformer.layers.2.2.to_k.weight": 1048576,
"transformer.layers.2.2.to_k.bias": 1024,
"transformer.layers.2.2.to_v.weight": 1048576,
"transformer.layers.2.2.to_v.bias": 1024,
"transformer.layers.2.2.to_out.0.weight": 1048576,
"transformer.layers.2.2.to_out.0.bias": 1024,
"transformer.layers.2.3.g": 1024,
"transformer.layers.2.4.ff.0.0.weight": 4194304,
"transformer.layers.2.4.ff.0.0.bias": 4096,
"transformer.layers.2.4.ff.2.weight": 4194304,
"transformer.layers.2.4.ff.2.bias": 1024,
"transformer.layers.3.1.g": 1024,
"transformer.layers.3.2.to_q.weight": 1048576,
"transformer.layers.3.2.to_q.bias": 1024,
"transformer.layers.3.2.to_k.weight": 1048576,
"transformer.layers.3.2.to_k.bias": 1024,
"transformer.layers.3.2.to_v.weight": 1048576,
"transformer.layers.3.2.to_v.bias": 1024,
"transformer.layers.3.2.to_out.0.weight": 1048576,
"transformer.layers.3.2.to_out.0.bias": 1024,
"transformer.layers.3.3.g": 1024,
"transformer.layers.3.4.ff.0.0.weight": 4194304,
"transformer.layers.3.4.ff.0.0.bias": 4096,
"transformer.layers.3.4.ff.2.weight": 4194304,
"transformer.layers.3.4.ff.2.bias": 1024,
"transformer.layers.4.1.g": 1024,
"transformer.layers.4.2.to_q.weight": 1048576,
"transformer.layers.4.2.to_q.bias": 1024,
"transformer.layers.4.2.to_k.weight": 1048576,
"transformer.layers.4.2.to_k.bias": 1024,
"transformer.layers.4.2.to_v.weight": 1048576,
"transformer.layers.4.2.to_v.bias": 1024,
"transformer.layers.4.2.to_out.0.weight": 1048576,
"transformer.layers.4.2.to_out.0.bias": 1024,
"transformer.layers.4.3.g": 1024,
"transformer.layers.4.4.ff.0.0.weight": 4194304,
"transformer.layers.4.4.ff.0.0.bias": 4096,
"transformer.layers.4.4.ff.2.weight": 4194304,
"transformer.layers.4.4.ff.2.bias": 1024,
"transformer.layers.5.1.g": 1024,
"transformer.layers.5.2.to_q.weight": 1048576,
"transformer.layers.5.2.to_q.bias": 1024,
"transformer.layers.5.2.to_k.weight": 1048576,
"transformer.layers.5.2.to_k.bias": 1024,
"transformer.layers.5.2.to_v.weight": 1048576,
"transformer.layers.5.2.to_v.bias": 1024,
"transformer.layers.5.2.to_out.0.weight": 1048576,
"transformer.layers.5.2.to_out.0.bias": 1024,
"transformer.layers.5.3.g": 1024,
"transformer.layers.5.4.ff.0.0.weight": 4194304,
"transformer.layers.5.4.ff.0.0.bias": 4096,
"transformer.layers.5.4.ff.2.weight": 4194304,
"transformer.layers.5.4.ff.2.bias": 1024,
"transformer.layers.6.1.g": 1024,
"transformer.layers.6.2.to_q.weight": 1048576,
"transformer.layers.6.2.to_q.bias": 1024,
"transformer.layers.6.2.to_k.weight": 1048576,
"transformer.layers.6.2.to_k.bias": 1024,
"transformer.layers.6.2.to_v.weight": 1048576,
"transformer.layers.6.2.to_v.bias": 1024,
"transformer.layers.6.2.to_out.0.weight": 1048576,
"transformer.layers.6.2.to_out.0.bias": 1024,
"transformer.layers.6.3.g": 1024,
"transformer.layers.6.4.ff.0.0.weight": 4194304,
"transformer.layers.6.4.ff.0.0.bias": 4096,
"transformer.layers.6.4.ff.2.weight": 4194304,
"transformer.layers.6.4.ff.2.bias": 1024,
"transformer.layers.7.1.g": 1024,
"transformer.layers.7.2.to_q.weight": 1048576,
"transformer.layers.7.2.to_q.bias": 1024,
"transformer.layers.7.2.to_k.weight": 1048576,
"transformer.layers.7.2.to_k.bias": 1024,
"transformer.layers.7.2.to_v.weight": 1048576,
"transformer.layers.7.2.to_v.bias": 1024,
"transformer.layers.7.2.to_out.0.weight": 1048576,
"transformer.layers.7.2.to_out.0.bias": 1024,
"transformer.layers.7.3.g": 1024,
"transformer.layers.7.4.ff.0.0.weight": 4194304,
"transformer.layers.7.4.ff.0.0.bias": 4096,
"transformer.layers.7.4.ff.2.weight": 4194304,
"transformer.layers.7.4.ff.2.bias": 1024,
"transformer.layers.8.1.g": 1024,
"transformer.layers.8.2.to_q.weight": 1048576,
"transformer.layers.8.2.to_q.bias": 1024,
"transformer.layers.8.2.to_k.weight": 1048576,
"transformer.layers.8.2.to_k.bias": 1024,
"transformer.layers.8.2.to_v.weight": 1048576,
"transformer.layers.8.2.to_v.bias": 1024,
"transformer.layers.8.2.to_out.0.weight": 1048576,
"transformer.layers.8.2.to_out.0.bias": 1024,
"transformer.layers.8.3.g": 1024,
"transformer.layers.8.4.ff.0.0.weight": 4194304,
"transformer.layers.8.4.ff.0.0.bias": 4096,
"transformer.layers.8.4.ff.2.weight": 4194304,
"transformer.layers.8.4.ff.2.bias": 1024,
"transformer.layers.9.1.g": 1024,
"transformer.layers.9.2.to_q.weight": 1048576,
"transformer.layers.9.2.to_q.bias": 1024,
"transformer.layers.9.2.to_k.weight": 1048576,
"transformer.layers.9.2.to_k.bias": 1024,
"transformer.layers.9.2.to_v.weight": 1048576,
"transformer.layers.9.2.to_v.bias": 1024,
"transformer.layers.9.2.to_out.0.weight": 1048576,
"transformer.layers.9.2.to_out.0.bias": 1024,
"transformer.layers.9.3.g": 1024,
"transformer.layers.9.4.ff.0.0.weight": 4194304,
"transformer.layers.9.4.ff.0.0.bias": 4096,
"transformer.layers.9.4.ff.2.weight": 4194304,
"transformer.layers.9.4.ff.2.bias": 1024,
"transformer.layers.10.1.g": 1024,
"transformer.layers.10.2.to_q.weight": 1048576,
"transformer.layers.10.2.to_q.bias": 1024,
"transformer.layers.10.2.to_k.weight": 1048576,
"transformer.layers.10.2.to_k.bias": 1024,
"transformer.layers.10.2.to_v.weight": 1048576,
"transformer.layers.10.2.to_v.bias": 1024,
"transformer.layers.10.2.to_out.0.weight": 1048576,
"transformer.layers.10.2.to_out.0.bias": 1024,
"transformer.layers.10.3.g": 1024,
"transformer.layers.10.4.ff.0.0.weight": 4194304,
"transformer.layers.10.4.ff.0.0.bias": 4096,
"transformer.layers.10.4.ff.2.weight": 4194304,
"transformer.layers.10.4.ff.2.bias": 1024,
"transformer.layers.11.1.g": 1024,
"transformer.layers.11.2.to_q.weight": 1048576,
"transformer.layers.11.2.to_q.bias": 1024,
"transformer.layers.11.2.to_k.weight": 1048576,
"transformer.layers.11.2.to_k.bias": 1024,
"transformer.layers.11.2.to_v.weight": 1048576,
"transformer.layers.11.2.to_v.bias": 1024,
"transformer.layers.11.2.to_out.0.weight": 1048576,
"transformer.layers.11.2.to_out.0.bias": 1024,
"transformer.layers.11.3.g": 1024,
"transformer.layers.11.4.ff.0.0.weight": 4194304,
"transformer.layers.11.4.ff.0.0.bias": 4096,
"transformer.layers.11.4.ff.2.weight": 4194304,
"transformer.layers.11.4.ff.2.bias": 1024,
"transformer.layers.12.1.g": 1024,
"transformer.layers.12.2.to_q.weight": 1048576,
"transformer.layers.12.2.to_q.bias": 1024,
"transformer.layers.12.2.to_k.weight": 1048576,
"transformer.layers.12.2.to_k.bias": 1024,
"transformer.layers.12.2.to_v.weight": 1048576,
"transformer.layers.12.2.to_v.bias": 1024,
"transformer.layers.12.2.to_out.0.weight": 1048576,
"transformer.layers.12.2.to_out.0.bias": 1024,
"transformer.layers.12.3.g": 1024,
"transformer.layers.12.4.ff.0.0.weight": 4194304,
"transformer.layers.12.4.ff.0.0.bias": 4096,
"transformer.layers.12.4.ff.2.weight": 4194304,
"transformer.layers.12.4.ff.2.bias": 1024,
"transformer.layers.13.0.weight": 2097152,
"transformer.layers.13.1.g": 1024,
"transformer.layers.13.2.to_q.weight": 1048576,
"transformer.layers.13.2.to_q.bias": 1024,
"transformer.layers.13.2.to_k.weight": 1048576,
"transformer.layers.13.2.to_k.bias": 1024,
"transformer.layers.13.2.to_v.weight": 1048576,
"transformer.layers.13.2.to_v.bias": 1024,
"transformer.layers.13.2.to_out.0.weight": 1048576,
"transformer.layers.13.2.to_out.0.bias": 1024,
"transformer.layers.13.3.g": 1024,
"transformer.layers.13.4.ff.0.0.weight": 4194304,
"transformer.layers.13.4.ff.0.0.bias": 4096,
"transformer.layers.13.4.ff.2.weight": 4194304,
"transformer.layers.13.4.ff.2.bias": 1024,
"transformer.layers.14.0.weight": 2097152,
"transformer.layers.14.1.g": 1024,
"transformer.layers.14.2.to_q.weight": 1048576,
"transformer.layers.14.2.to_q.bias": 1024,
"transformer.layers.14.2.to_k.weight": 1048576,
"transformer.layers.14.2.to_k.bias": 1024,
"transformer.layers.14.2.to_v.weight": 1048576,
"transformer.layers.14.2.to_v.bias": 1024,
"transformer.layers.14.2.to_out.0.weight": 1048576,
"transformer.layers.14.2.to_out.0.bias": 1024,
"transformer.layers.14.3.g": 1024,
"transformer.layers.14.4.ff.0.0.weight": 4194304,
"transformer.layers.14.4.ff.0.0.bias": 4096,
"transformer.layers.14.4.ff.2.weight": 4194304,
"transformer.layers.14.4.ff.2.bias": 1024,
"transformer.layers.15.0.weight": 2097152,
"transformer.layers.15.1.g": 1024,
"transformer.layers.15.2.to_q.weight": 1048576,
"transformer.layers.15.2.to_q.bias": 1024,
"transformer.layers.15.2.to_k.weight": 1048576,
"transformer.layers.15.2.to_k.bias": 1024,
"transformer.layers.15.2.to_v.weight": 1048576,
"transformer.layers.15.2.to_v.bias": 1024,
"transformer.layers.15.2.to_out.0.weight": 1048576,
"transformer.layers.15.2.to_out.0.bias": 1024,
"transformer.layers.15.3.g": 1024,
"transformer.layers.15.4.ff.0.0.weight": 4194304,
"transformer.layers.15.4.ff.0.0.bias": 4096,
"transformer.layers.15.4.ff.2.weight": 4194304,
"transformer.layers.15.4.ff.2.bias": 1024,
"transformer.layers.16.0.weight": 2097152,
"transformer.layers.16.1.g": 1024,
"transformer.layers.16.2.to_q.weight": 1048576,
"transformer.layers.16.2.to_q.bias": 1024,
"transformer.layers.16.2.to_k.weight": 1048576,
"transformer.layers.16.2.to_k.bias": 1024,
"transformer.layers.16.2.to_v.weight": 1048576,
"transformer.layers.16.2.to_v.bias": 1024,
"transformer.layers.16.2.to_out.0.weight": 1048576,
"transformer.layers.16.2.to_out.0.bias": 1024,
"transformer.layers.16.3.g": 1024,
"transformer.layers.16.4.ff.0.0.weight": 4194304,
"transformer.layers.16.4.ff.0.0.bias": 4096,
"transformer.layers.16.4.ff.2.weight": 4194304,
"transformer.layers.16.4.ff.2.bias": 1024,
"transformer.layers.17.0.weight": 2097152,
"transformer.layers.17.1.g": 1024,
"transformer.layers.17.2.to_q.weight": 1048576,
"transformer.layers.17.2.to_q.bias": 1024,
"transformer.layers.17.2.to_k.weight": 1048576,
"transformer.layers.17.2.to_k.bias": 1024,
"transformer.layers.17.2.to_v.weight": 1048576,
"transformer.layers.17.2.to_v.bias": 1024,
"transformer.layers.17.2.to_out.0.weight": 1048576,
"transformer.layers.17.2.to_out.0.bias": 1024,
"transformer.layers.17.3.g": 1024,
"transformer.layers.17.4.ff.0.0.weight": 4194304,
"transformer.layers.17.4.ff.0.0.bias": 4096,
"transformer.layers.17.4.ff.2.weight": 4194304,
"transformer.layers.17.4.ff.2.bias": 1024,
"transformer.layers.18.0.weight": 2097152,
"transformer.layers.18.1.g": 1024,
"transformer.layers.18.2.to_q.weight": 1048576,
"transformer.layers.18.2.to_q.bias": 1024,
"transformer.layers.18.2.to_k.weight": 1048576,
"transformer.layers.18.2.to_k.bias": 1024,
"transformer.layers.18.2.to_v.weight": 1048576,
"transformer.layers.18.2.to_v.bias": 1024,
"transformer.layers.18.2.to_out.0.weight": 1048576,
"transformer.layers.18.2.to_out.0.bias": 1024,
"transformer.layers.18.3.g": 1024,
"transformer.layers.18.4.ff.0.0.weight": 4194304,
"transformer.layers.18.4.ff.0.0.bias": 4096,
"transformer.layers.18.4.ff.2.weight": 4194304,
"transformer.layers.18.4.ff.2.bias": 1024,
"transformer.layers.19.0.weight": 2097152,
"transformer.layers.19.1.g": 1024,
"transformer.layers.19.2.to_q.weight": 1048576,
"transformer.layers.19.2.to_q.bias": 1024,
"transformer.layers.19.2.to_k.weight": 1048576,
"transformer.layers.19.2.to_k.bias": 1024,
"transformer.layers.19.2.to_v.weight": 1048576,
"transformer.layers.19.2.to_v.bias": 1024,
"transformer.layers.19.2.to_out.0.weight": 1048576,
"transformer.layers.19.2.to_out.0.bias": 1024,
"transformer.layers.19.3.g": 1024,
"transformer.layers.19.4.ff.0.0.weight": 4194304,
"transformer.layers.19.4.ff.0.0.bias": 4096,
"transformer.layers.19.4.ff.2.weight": 4194304,
"transformer.layers.19.4.ff.2.bias": 1024,
"transformer.layers.20.0.weight": 2097152,
"transformer.layers.20.1.g": 1024,
"transformer.layers.20.2.to_q.weight": 1048576,
"transformer.layers.20.2.to_q.bias": 1024,
"transformer.layers.20.2.to_k.weight": 1048576,
"transformer.layers.20.2.to_k.bias": 1024,
"transformer.layers.20.2.to_v.weight": 1048576,
"transformer.layers.20.2.to_v.bias": 1024,
"transformer.layers.20.2.to_out.0.weight": 1048576,
"transformer.layers.20.2.to_out.0.bias": 1024,
"transformer.layers.20.3.g": 1024,
"transformer.layers.20.4.ff.0.0.weight": 4194304,
"transformer.layers.20.4.ff.0.0.bias": 4096,
"transformer.layers.20.4.ff.2.weight": 4194304,
"transformer.layers.20.4.ff.2.bias": 1024,
"transformer.layers.21.0.weight": 2097152,
"transformer.layers.21.1.g": 1024,
"transformer.layers.21.2.to_q.weight": 1048576,
"transformer.layers.21.2.to_q.bias": 1024,
"transformer.layers.21.2.to_k.weight": 1048576,
"transformer.layers.21.2.to_k.bias": 1024,
"transformer.layers.21.2.to_v.weight": 1048576,
"transformer.layers.21.2.to_v.bias": 1024,
"transformer.layers.21.2.to_out.0.weight": 1048576,
"transformer.layers.21.2.to_out.0.bias": 1024,
"transformer.layers.21.3.g": 1024,
"transformer.layers.21.4.ff.0.0.weight": 4194304,
"transformer.layers.21.4.ff.0.0.bias": 4096,
"transformer.layers.21.4.ff.2.weight": 4194304,
"transformer.layers.21.4.ff.2.bias": 1024,
"transformer.layers.22.0.weight": 2097152,
"transformer.layers.22.1.g": 1024,
"transformer.layers.22.2.to_q.weight": 1048576,
"transformer.layers.22.2.to_q.bias": 1024,
"transformer.layers.22.2.to_k.weight": 1048576,
"transformer.layers.22.2.to_k.bias": 1024,
"transformer.layers.22.2.to_v.weight": 1048576,
"transformer.layers.22.2.to_v.bias": 1024,
"transformer.layers.22.2.to_out.0.weight": 1048576,
"transformer.layers.22.2.to_out.0.bias": 1024,
"transformer.layers.22.3.g": 1024,
"transformer.layers.22.4.ff.0.0.weight": 4194304,
"transformer.layers.22.4.ff.0.0.bias": 4096,
"transformer.layers.22.4.ff.2.weight": 4194304,
"transformer.layers.22.4.ff.2.bias": 1024,
"transformer.layers.23.0.weight": 2097152,
"transformer.layers.23.1.g": 1024,
"transformer.layers.23.2.to_q.weight": 1048576,
"transformer.layers.23.2.to_q.bias": 1024,
"transformer.layers.23.2.to_k.weight": 1048576,
"transformer.layers.23.2.to_k.bias": 1024,
"transformer.layers.23.2.to_v.weight": 1048576,
"transformer.layers.23.2.to_v.bias": 1024,
"transformer.layers.23.2.to_out.0.weight": 1048576,
"transformer.layers.23.2.to_out.0.bias": 1024,
"transformer.layers.23.3.g": 1024,
"transformer.layers.23.4.ff.0.0.weight": 4194304,
"transformer.layers.23.4.ff.0.0.bias": 4096,
"transformer.layers.23.4.ff.2.weight": 4194304,
"transformer.layers.23.4.ff.2.bias": 1024,
"transformer.layers.24.0.weight": 2097152,
"transformer.layers.24.1.g": 1024,
"transformer.layers.24.2.to_q.weight": 1048576,
"transformer.layers.24.2.to_q.bias": 1024,
"transformer.layers.24.2.to_k.weight": 1048576,
"transformer.layers.24.2.to_k.bias": 1024,
"transformer.layers.24.2.to_v.weight": 1048576,
"transformer.layers.24.2.to_v.bias": 1024,
"transformer.layers.24.2.to_out.0.weight": 1048576,
"transformer.layers.24.2.to_out.0.bias": 1024,
"transformer.layers.24.3.g": 1024,
"transformer.layers.24.4.ff.0.0.weight": 4194304,
"transformer.layers.24.4.ff.0.0.bias": 4096,
"transformer.layers.24.4.ff.2.weight": 4194304,
"transformer.layers.24.4.ff.2.bias": 1024,
"transformer.layers.25.0.weight": 2097152,
"transformer.layers.25.1.g": 1024,
"transformer.layers.25.2.to_q.weight": 1048576,
"transformer.layers.25.2.to_q.bias": 1024,
"transformer.layers.25.2.to_k.weight": 1048576,
"transformer.layers.25.2.to_k.bias": 1024,
"transformer.layers.25.2.to_v.weight": 1048576,
"transformer.layers.25.2.to_v.bias": 1024,
"transformer.layers.25.2.to_out.0.weight": 1048576,
"transformer.layers.25.2.to_out.0.bias": 1024,
"transformer.layers.25.3.g": 1024,
"transformer.layers.25.4.ff.0.0.weight": 4194304,
"transformer.layers.25.4.ff.0.0.bias": 4096,
"transformer.layers.25.4.ff.2.weight": 4194304,
"transformer.layers.25.4.ff.2.bias": 1024,
"transformer.norm_out.g": 1024,
"transformer.proj_out.weight": 102400,
"transformer.proj_out.bias": 100
},
"important_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
],
"bottleneck_layers": [],
"recommendations": {
"focus_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
]
},
"total_parameters": 391,
"total_elements": 360755948,
"param_ranges": {
"transformer.time_embed.time_mlp.0.weight": {
"min": -0.430247962474823,
"max": 0.29814788699150085,
"mean": -0.0025456156581640244,
"std": 0.042562179267406464,
"sparsity": 0.0,
"shape": [
1024,
256
]
},
"transformer.time_embed.time_mlp.0.bias": {
"min": -0.06305033713579178,
"max": 0.10756707191467285,
"mean": 0.0006329622119665146,
"std": 0.03406817466020584,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.time_embed.time_mlp.2.weight": {
"min": -0.4126828908920288,
"max": 0.8368642926216125,
"mean": -0.00020196933473926038,
"std": 0.024113450199365616,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.time_embed.time_mlp.2.bias": {
"min": -0.11526867002248764,
"max": 0.3216077983379364,
"mean": -0.0009404964512214065,
"std": 0.019565371796488762,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.text_embed.text_embed.weight": {
"min": -2.7922351360321045,
"max": 2.8709537982940674,
"mean": -0.0003647372650448233,
"std": 0.6154845356941223,
"sparsity": 0.0,
"shape": [
2546,
100
]
},
"transformer.input_embed.proj.weight": {
"min": -0.27921348810195923,
"max": 0.38164129853248596,
"mean": 0.0004232236242387444,
"std": 0.04274886101484299,
"sparsity": 0.0,
"shape": [
1024,
300
]
},
"transformer.input_embed.proj.bias": {
"min": -0.2224942147731781,
"max": 0.20972047746181488,
"mean": -0.004487486090511084,
"std": 0.040916070342063904,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": {
"min": -0.4284340739250183,
"max": 0.47617435455322266,
"mean": 3.322187239973573e-06,
"std": 0.024511422961950302,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": {
"min": -0.32528114318847656,
"max": 0.15677402913570404,
"mean": -0.04670446366071701,
"std": 0.051589105278253555,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": {
"min": -0.41054657101631165,
"max": 0.3546879291534424,
"mean": -0.00012705953849945217,
"std": 0.023604456335306168,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": {
"min": -0.22982755303382874,
"max": 0.26271378993988037,
"mean": -0.029137738049030304,
"std": 0.049353621900081635,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.1.g": {
"min": 0.25457319617271423,
"max": 0.8201438188552856,
"mean": 0.5254908800125122,
"std": 0.08082503080368042,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_q.weight": {
"min": -0.29710477590560913,
"max": 0.26579147577285767,
"mean": -0.0004257034743204713,
"std": 0.03210267424583435,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_q.bias": {
"min": -0.09286229312419891,
"max": 0.12479868531227112,
"mean": 0.0006487525533884764,
"std": 0.025735046714544296,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_k.weight": {
"min": -0.290811687707901,
"max": 0.2813718020915985,
"mean": -7.56493245717138e-05,
"std": 0.030931707471609116,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_k.bias": {
"min": -5.900395393371582,
"max": 5.815171718597412,
"mean": -0.009333105757832527,
"std": 1.295695185661316,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_v.weight": {
"min": -0.4251435399055481,
"max": 0.3437366187572479,
"mean": 9.79713149718009e-05,
"std": 0.02995358221232891,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_v.bias": {
"min": -0.028972996398806572,
"max": 0.027724435552954674,
"mean": -0.00031865754863247275,
"std": 0.012574296444654465,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_out.0.weight": {
"min": -0.45405057072639465,
"max": 0.44834038615226746,
"mean": 2.372298331465572e-05,
"std": 0.02385387383401394,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_out.0.bias": {
"min": -0.08870794624090195,
"max": 0.09110292047262192,
"mean": 0.0022859524469822645,
"std": 0.01951485686004162,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.3.g": {
"min": 0.26681551337242126,
"max": 1.056317687034607,
"mean": 0.5312033891677856,
"std": 0.10443911701440811,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.4.ff.0.0.weight": {
"min": -0.5745526552200317,
"max": 0.6082873940467834,
"mean": -0.00043126955279149115,
"std": 0.03860025480389595,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.0.4.ff.0.0.bias": {
"min": -0.18273141980171204,
"max": 0.04556818678975105,
"mean": -0.029461650177836418,
"std": 0.042611170560121536,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.0.4.ff.2.weight": {
"min": -1.1671894788742065,
"max": 1.6339271068572998,
"mean": 0.0003239789803046733,
"std": 0.027696946635842323,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.0.4.ff.2.bias": {
"min": -0.16238771378993988,
"max": 0.20571960508823395,
"mean": -0.021131085231900215,
"std": 0.02794588916003704,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.1.g": {
"min": 0.22399598360061646,
"max": 0.8438678979873657,
"mean": 0.48765647411346436,
"std": 0.07522650808095932,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_q.weight": {
"min": -0.2555526793003082,
"max": 0.305812269449234,
"mean": -6.7934306571260095e-06,
"std": 0.03347478806972504,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_q.bias": {
"min": -0.09538023918867111,
"max": 0.11050069332122803,
"mean": 6.53832103125751e-05,
"std": 0.02696637623012066,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_k.weight": {
"min": -0.297147661447525,
"max": 0.2961280345916748,
"mean": 5.286935265758075e-05,
"std": 0.032545968890190125,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_k.bias": {
"min": -5.165225028991699,
"max": 5.085448741912842,
"mean": -0.014597500674426556,
"std": 1.1575955152511597,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_v.weight": {
"min": -0.3449052572250366,
"max": 0.34331217408180237,
"mean": 7.911311695352197e-05,
"std": 0.03006201609969139,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_v.bias": {
"min": -0.03610210865736008,
"max": 0.03328812122344971,
"mean": -0.0001417656458215788,
"std": 0.01303204894065857,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_out.0.weight": {
"min": -0.3154510259628296,
"max": 0.37501609325408936,
"mean": -2.077353019558359e-05,
"std": 0.024059347808361053,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_out.0.bias": {
"min": -0.10547598451375961,
"max": 0.1221047043800354,
"mean": -0.0019677607342600822,
"std": 0.028854791074991226,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.3.g": {
"min": 0.31151488423347473,
"max": 1.1208997964859009,
"mean": 0.6663015484809875,
"std": 0.09774678200483322,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.4.ff.0.0.weight": {
"min": -0.8727833032608032,
"max": 0.6275414824485779,
"mean": 0.001675266888923943,
"std": 0.04743880406022072,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.1.4.ff.0.0.bias": {
"min": -0.2714674770832062,
"max": 0.03427550569176674,
"mean": -0.04661353677511215,
"std": 0.040598493069410324,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.1.4.ff.2.weight": {
"min": -0.9226045608520508,
"max": 0.9647504687309265,
"mean": 0.0010200842516496778,
"std": 0.040706485509872437,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.1.4.ff.2.bias": {
"min": -0.1445719450712204,
"max": 0.07502147555351257,
"mean": -0.009089105762541294,
"std": 0.025694996118545532,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.1.g": {
"min": 0.24015086889266968,
"max": 0.7130303978919983,
"mean": 0.4472612142562866,
"std": 0.05932846665382385,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_q.weight": {
"min": -0.27250105142593384,
"max": 0.29779112339019775,
"mean": 9.235942343366332e-06,
"std": 0.03546915203332901,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_q.bias": {
"min": -0.1193777546286583,
"max": 0.11857955157756805,
"mean": 0.0007589810993522406,
"std": 0.02763049118220806,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_k.weight": {
"min": -0.28105634450912476,
"max": 0.2798849046230316,
"mean": -7.697378896409646e-05,
"std": 0.0350995697081089,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_k.bias": {
"min": -2.5100622177124023,
"max": 2.5220582485198975,
"mean": 0.02675231173634529,
"std": 0.5868890285491943,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_v.weight": {
"min": -0.2211104929447174,
"max": 0.27162447571754456,
"mean": 2.60172691923799e-06,
"std": 0.030733274295926094,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_v.bias": {
"min": -0.033548399806022644,
"max": 0.03133385255932808,
"mean": 0.00011904191342182457,
"std": 0.012407796457409859,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_out.0.weight": {
"min": -0.23527584969997406,
"max": 0.23167696595191956,
"mean": 5.708727621822618e-05,
"std": 0.025696981698274612,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_out.0.bias": {
"min": -0.13586905598640442,
"max": 0.12758414447307587,
"mean": -0.0054936036467552185,
"std": 0.039962876588106155,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.3.g": {
"min": 0.35451188683509827,
"max": 1.1720999479293823,
"mean": 0.710637629032135,
"std": 0.10376914590597153,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.4.ff.0.0.weight": {
"min": -0.6174948811531067,
"max": 0.5544577240943909,
"mean": 0.0011600415455177426,
"std": 0.04611966758966446,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.2.4.ff.0.0.bias": {
"min": -0.1883939653635025,
"max": 0.02492486871778965,
"mean": -0.03484141081571579,
"std": 0.028610829263925552,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.2.4.ff.2.weight": {
"min": -1.131612777709961,
"max": 0.9714275002479553,
"mean": 0.00035819801269099116,
"std": 0.04234758019447327,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.2.4.ff.2.bias": {
"min": -0.5980822443962097,
"max": 0.06284141540527344,
"mean": -0.004877430386841297,
"std": 0.028617603704333305,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.1.g": {
"min": 0.37526264786720276,
"max": 0.9405426383018494,
"mean": 0.5925549268722534,
"std": 0.0669507160782814,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_q.weight": {
"min": -0.39145711064338684,
"max": 0.3691279888153076,
"mean": 7.120549707906321e-05,
"std": 0.03718876466155052,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_q.bias": {
"min": -0.11895960569381714,
"max": 0.13652607798576355,
"mean": 0.0009289687732234597,
"std": 0.029236802831292152,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_k.weight": {
"min": -0.619219183921814,
"max": 0.5088949203491211,
"mean": 1.4944693248253316e-05,
"std": 0.036442093551158905,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_k.bias": {
"min": -8.188663482666016,
"max": 8.790773391723633,
"mean": -0.10929473489522934,
"std": 1.6991605758666992,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_v.weight": {
"min": -0.2766683101654053,
"max": 0.23983481526374817,
"mean": 5.299611802911386e-05,
"std": 0.032615721225738525,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_v.bias": {
"min": -0.052095651626586914,
"max": 0.039515361189842224,
"mean": 9.424134623259306e-05,
"std": 0.012960628606379032,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_out.0.weight": {
"min": -0.23076868057250977,
"max": 0.234751895070076,
"mean": -2.1736430426244624e-05,
"std": 0.029392007738351822,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_out.0.bias": {
"min": -0.20435833930969238,
"max": 0.10555171221494675,
"mean": -0.004022371023893356,
"std": 0.03262435272336006,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.3.g": {
"min": 0.33977094292640686,
"max": 1.0126755237579346,
"mean": 0.7008676528930664,
"std": 0.0967569425702095,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.4.ff.0.0.weight": {
"min": -0.5649488568305969,
"max": 0.8331477046012878,
"mean": 0.00041524306288920343,
"std": 0.04230210557579994,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.3.4.ff.0.0.bias": {
"min": -0.21171551942825317,
"max": 0.030433084815740585,
"mean": -0.03218771517276764,
"std": 0.026509009301662445,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.3.4.ff.2.weight": {
"min": -0.7544965744018555,
"max": 0.7186921834945679,
"mean": -1.2556927686091512e-05,
"std": 0.036842044442892075,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.3.4.ff.2.bias": {
"min": -0.26356518268585205,
"max": 0.10585562884807587,
"mean": -0.003026221413165331,
"std": 0.028868772089481354,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.1.g": {
"min": 0.28427132964134216,
"max": 0.6951562762260437,
"mean": 0.4995492994785309,
"std": 0.046537742018699646,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_q.weight": {
"min": -0.27920955419540405,
"max": 0.23424308001995087,
"mean": -0.00011120487761218101,
"std": 0.038762450218200684,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_q.bias": {
"min": -0.15435229241847992,
"max": 0.126743882894516,
"mean": -0.002232551807537675,
"std": 0.03338867425918579,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_k.weight": {
"min": -0.41404443979263306,
"max": 0.6600516438484192,
"mean": -1.9756593246711418e-05,
"std": 0.03909948095679283,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_k.bias": {
"min": -4.238841533660889,
"max": 4.723404884338379,
"mean": -0.02046278491616249,
"std": 1.0078744888305664,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_v.weight": {
"min": -0.24500444531440735,
"max": 0.20759114623069763,
"mean": 4.401802652864717e-05,
"std": 0.03396647423505783,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_v.bias": {
"min": -0.03457580879330635,
"max": 0.04486193135380745,
"mean": -1.914246240630746e-05,
"std": 0.012628658674657345,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_out.0.weight": {
"min": -0.20080946385860443,
"max": 0.20593363046646118,
"mean": -2.9703282052651048e-05,
"std": 0.03102399967610836,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_out.0.bias": {
"min": -0.20000168681144714,
"max": 0.11336001008749008,
"mean": -0.002912652213126421,
"std": 0.03451835736632347,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.3.g": {
"min": 0.3670476973056793,
"max": 1.0570876598358154,
"mean": 0.6706215143203735,
"std": 0.06639451533555984,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.4.ff.0.0.weight": {
"min": -0.39835721254348755,
"max": 0.5023353695869446,
"mean": -3.849938002531417e-05,
"std": 0.0411369614303112,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.4.4.ff.0.0.bias": {
"min": -0.12806333601474762,
"max": 0.026793837547302246,
"mean": -0.030542662367224693,
"std": 0.021876059472560883,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.4.4.ff.2.weight": {
"min": -0.4490928053855896,
"max": 0.4329548478126526,
"mean": 7.997997454367578e-05,
"std": 0.03489622473716736,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.4.4.ff.2.bias": {
"min": -0.2676912248134613,
"max": 0.07277432084083557,
"mean": -0.0011054163333028555,
"std": 0.023129144683480263,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.1.g": {
"min": 0.28743863105773926,
"max": 0.6852545738220215,
"mean": 0.5245908498764038,
"std": 0.047539178282022476,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_q.weight": {
"min": -0.22235621511936188,
"max": 0.2234710454940796,
"mean": 1.5755222193547525e-05,
"std": 0.03895283117890358,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_q.bias": {
"min": -0.13644249737262726,
"max": 0.10925862938165665,
"mean": 0.00023633803357370198,
"std": 0.029229167848825455,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_k.weight": {
"min": -0.3750911056995392,
"max": 0.4374293088912964,
"mean": -9.469786164117977e-06,
"std": 0.03928925842046738,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_k.bias": {
"min": -3.8464367389678955,
"max": 5.000250816345215,
"mean": 0.009745623916387558,
"std": 0.8453732132911682,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_v.weight": {
"min": -0.22324559092521667,
"max": 0.22006931900978088,
"mean": -2.64663412963273e-07,
"std": 0.03441375494003296,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_v.bias": {
"min": -0.04371564835309982,
"max": 0.03597109019756317,
"mean": -0.0002580236759968102,
"std": 0.012081029824912548,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_out.0.weight": {
"min": -0.21329627931118011,
"max": 0.1888744831085205,
"mean": -1.6700443666195497e-05,
"std": 0.03154045715928078,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_out.0.bias": {
"min": -0.1808258593082428,
"max": 0.12078980356454849,
"mean": -0.002406290266662836,
"std": 0.04127614200115204,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.3.g": {
"min": 0.42247915267944336,
"max": 0.9420861601829529,
"mean": 0.6627910733222961,
"std": 0.0568135567009449,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.4.ff.0.0.weight": {
"min": -0.3714267611503601,
"max": 0.47587329149246216,
"mean": -8.246101788245142e-05,
"std": 0.04089611768722534,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.5.4.ff.0.0.bias": {
"min": -0.208319753408432,
"max": 0.02722310833632946,
"mean": -0.03024582751095295,
"std": 0.021349623799324036,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.5.4.ff.2.weight": {
"min": -0.34010598063468933,
"max": 0.7335456013679504,
"mean": 8.291324775200337e-05,
"std": 0.03477157652378082,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.5.4.ff.2.bias": {
"min": -0.2402523010969162,
"max": 0.050502024590969086,
"mean": -0.0011936500668525696,
"std": 0.020464643836021423,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.1.g": {
"min": 0.3060189485549927,
"max": 0.6537417769432068,
"mean": 0.5251810550689697,
"std": 0.046129435300827026,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_q.weight": {
"min": -0.3043527901172638,
"max": 0.2173452079296112,
"mean": 6.987799861235544e-05,
"std": 0.03949924185872078,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_q.bias": {
"min": -0.1495305597782135,
"max": 0.13139042258262634,
"mean": 0.0003452928503975272,
"std": 0.03046758659183979,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_k.weight": {
"min": -0.25741448998451233,
"max": 0.2021329253911972,
"mean": 3.105932046310045e-05,
"std": 0.039488501846790314,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_k.bias": {
"min": -2.336733102798462,
"max": 2.376356840133667,
"mean": -0.026247980073094368,
"std": 0.44985267519950867,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_v.weight": {
"min": -0.18904413282871246,
"max": 0.2104651778936386,
"mean": 3.720704626175575e-05,
"std": 0.03479856252670288,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_v.bias": {
"min": -0.03166992589831352,
"max": 0.035564228892326355,
"mean": -0.00020107123418711126,
"std": 0.012294227257370949,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_out.0.weight": {
"min": -0.18845464289188385,
"max": 0.17046742141246796,
"mean": -6.800049595767632e-05,
"std": 0.03217524290084839,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_out.0.bias": {
"min": -0.13940171897411346,
"max": 0.13724905252456665,
"mean": -0.002515769563615322,
"std": 0.05131084844470024,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.3.g": {
"min": 0.4671289920806885,
"max": 0.9564934968948364,
"mean": 0.6689913272857666,
"std": 0.05279172211885452,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.4.ff.0.0.weight": {
"min": -0.3243524730205536,
"max": 0.30971962213516235,
"mean": -1.389088538417127e-06,
"std": 0.04095206782221794,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.6.4.ff.0.0.bias": {
"min": -0.12475074827671051,
"max": 0.02534548193216324,
"mean": -0.03070956841111183,
"std": 0.019817529246211052,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.6.4.ff.2.weight": {
"min": -0.44013386964797974,
"max": 0.44524946808815,
"mean": 9.531535761198029e-05,
"std": 0.03512435778975487,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.6.4.ff.2.bias": {
"min": -0.22465433180332184,
"max": 0.05168891325592995,
"mean": -0.0011842836393043399,
"std": 0.018476232886314392,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.1.g": {
"min": 0.3392145037651062,
"max": 0.739431619644165,
"mean": 0.5587528944015503,
"std": 0.04140577092766762,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_q.weight": {
"min": -0.2725517153739929,
"max": 0.2784435749053955,
"mean": 1.987360155908391e-05,
"std": 0.04106256738305092,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_q.bias": {
"min": -0.13695892691612244,
"max": 0.13984902203083038,
"mean": 0.00048777679330669343,
"std": 0.026632118970155716,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_k.weight": {
"min": -0.4907291829586029,
"max": 0.35599952936172485,
"mean": 8.879909000825137e-05,
"std": 0.0407005213201046,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_k.bias": {
"min": -2.2975404262542725,
"max": 1.7454535961151123,
"mean": -0.02108157053589821,
"std": 0.5002167820930481,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_v.weight": {
"min": -0.2176651507616043,
"max": 0.19791799783706665,
"mean": -4.056983016198501e-05,
"std": 0.03423743695020676,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_v.bias": {
"min": -0.04131868854165077,
"max": 0.038581475615501404,
"mean": -0.00014208082575351,
"std": 0.012879491783678532,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_out.0.weight": {
"min": -0.17750245332717896,
"max": 0.18368542194366455,
"mean": 4.755006739287637e-05,
"std": 0.031560346484184265,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_out.0.bias": {
"min": -0.17995940148830414,
"max": 0.18388336896896362,
"mean": -0.0022164953406900167,
"std": 0.05484570935368538,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.3.g": {
"min": 0.4742797613143921,
"max": 1.0257062911987305,
"mean": 0.6453534960746765,
"std": 0.05035950988531113,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.4.ff.0.0.weight": {
"min": -0.27185168862342834,
"max": 0.3093569278717041,
"mean": 0.00011239617015235126,
"std": 0.04068810120224953,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.7.4.ff.0.0.bias": {
"min": -0.10582997649908066,
"max": 0.02683391235768795,
"mean": -0.029520545154809952,
"std": 0.01793094538152218,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.7.4.ff.2.weight": {
"min": -0.3390536606311798,
"max": 0.32923397421836853,
"mean": 5.560236604651436e-05,
"std": 0.03441813215613365,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.7.4.ff.2.bias": {
"min": -0.181716188788414,
"max": 0.04217486456036568,
"mean": -0.0010700200218707323,
"std": 0.017213836312294006,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.1.g": {
"min": 0.32544824481010437,
"max": 0.6866950988769531,
"mean": 0.511271595954895,
"std": 0.036954350769519806,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_q.weight": {
"min": -0.23384520411491394,
"max": 0.22571122646331787,
"mean": -3.601049320423044e-05,
"std": 0.0391816720366478,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_q.bias": {
"min": -0.1153523325920105,
"max": 0.1316574662923813,
"mean": 0.000150712497998029,
"std": 0.029186168685555458,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_k.weight": {
"min": -0.35289716720581055,
"max": 0.285473108291626,
"mean": 7.233719770738389e-06,
"std": 0.03925013542175293,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_k.bias": {
"min": -4.133274078369141,
"max": 3.544353723526001,
"mean": -0.011593173258006573,
"std": 0.6827409267425537,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_v.weight": {
"min": -0.21133771538734436,
"max": 0.20911119878292084,
"mean": 3.477419522823766e-05,
"std": 0.034489333629608154,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_v.bias": {
"min": -0.03563081845641136,
"max": 0.04807223752140999,
"mean": 0.0007964536780491471,
"std": 0.012856329791247845,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_out.0.weight": {
"min": -0.21064519882202148,
"max": 0.19317731261253357,
"mean": -1.2986236015422037e-06,
"std": 0.03169986233115196,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_out.0.bias": {
"min": -0.1866597682237625,
"max": 0.17717307806015015,
"mean": -0.002846275921911001,
"std": 0.05864023044705391,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.3.g": {
"min": 0.47464174032211304,
"max": 1.0418421030044556,
"mean": 0.6514742970466614,
"std": 0.049661051481962204,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.4.ff.0.0.weight": {
"min": -0.2484884411096573,
"max": 0.3291080594062805,
"mean": 0.00018062048184219748,
"std": 0.040576666593551636,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.8.4.ff.0.0.bias": {
"min": -0.12466001510620117,
"max": 0.024652821943163872,
"mean": -0.030505184084177017,
"std": 0.01760147698223591,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.8.4.ff.2.weight": {
"min": -0.42117249965667725,
"max": 0.48183169960975647,
"mean": 4.90086677018553e-07,
"std": 0.03540300950407982,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.8.4.ff.2.bias": {
"min": -0.15187376737594604,
"max": 0.04340476170182228,
"mean": 4.305229231249541e-05,
"std": 0.014882412739098072,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.1.g": {
"min": 0.31561803817749023,
"max": 0.6820628046989441,
"mean": 0.5529670715332031,
"std": 0.04071620851755142,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_q.weight": {
"min": -0.20640292763710022,
"max": 0.2199181616306305,
"mean": 3.100156754953787e-05,
"std": 0.03830336779356003,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_q.bias": {
"min": -0.13785934448242188,
"max": 0.11272227019071579,
"mean": 2.0263127225916833e-05,
"std": 0.02582014910876751,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_k.weight": {
"min": -0.4027767777442932,
"max": 0.37112095952033997,
"mean": 2.6220748623018153e-05,
"std": 0.038185179233551025,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_k.bias": {
"min": -3.7714638710021973,
"max": 2.8691656589508057,
"mean": 0.0011573480442166328,
"std": 0.5169197916984558,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_v.weight": {
"min": -0.20294718444347382,
"max": 0.1975032389163971,
"mean": 2.9508448278647847e-05,
"std": 0.03430049493908882,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_v.bias": {
"min": -0.050956204533576965,
"max": 0.04001324996352196,
"mean": -0.0004197848029434681,
"std": 0.013423827476799488,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_out.0.weight": {
"min": -0.1965385526418686,
"max": 0.20179617404937744,
"mean": -1.230049292644253e-05,
"std": 0.03180824965238571,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_out.0.bias": {
"min": -0.1932075023651123,
"max": 0.19514988362789154,
"mean": -0.002968719694763422,
"std": 0.06257235258817673,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.3.g": {
"min": 0.3494449555873871,
"max": 1.084139108657837,
"mean": 0.6672452688217163,
"std": 0.055235255509614944,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.4.ff.0.0.weight": {
"min": -0.22517867386341095,
"max": 0.2515127956867218,
"mean": 0.0003590761625673622,
"std": 0.04076584428548813,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.9.4.ff.0.0.bias": {
"min": -0.09105702489614487,
"max": 0.043770160526037216,
"mean": -0.030091021209955215,
"std": 0.0176088884472847,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.9.4.ff.2.weight": {
"min": -0.3535248339176178,
"max": 0.30410754680633545,
"mean": -4.392282062326558e-05,
"std": 0.03712813928723335,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.9.4.ff.2.bias": {
"min": -0.16202455759048462,
"max": 0.06354078650474548,
"mean": -8.128902118187398e-05,
"std": 0.01940615102648735,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.1.g": {
"min": 0.34876754879951477,
"max": 0.7220309376716614,
"mean": 0.5424379706382751,
"std": 0.039069268852472305,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_q.weight": {
"min": -0.2193686068058014,
"max": 0.22314214706420898,
"mean": -1.1116904715890996e-05,
"std": 0.03923606500029564,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_q.bias": {
"min": -0.11840695887804031,
"max": 0.1707676649093628,
"mean": 0.00028346438193693757,
"std": 0.025122247636318207,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_k.weight": {
"min": -0.24684838950634003,
"max": 0.3010847866535187,
"mean": -3.651722363429144e-05,
"std": 0.038935575634241104,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_k.bias": {
"min": -3.5055902004241943,
"max": 3.715036153793335,
"mean": 0.01585192233324051,
"std": 0.7825286984443665,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_v.weight": {
"min": -0.21871182322502136,
"max": 0.2376304566860199,
"mean": -1.361081376671791e-05,
"std": 0.03630790859460831,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_v.bias": {
"min": -0.04719124361872673,
"max": 0.05140624940395355,
"mean": 0.00048010991304181516,
"std": 0.013516944833099842,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_out.0.weight": {
"min": -0.21404245495796204,
"max": 0.21762129664421082,
"mean": 5.64762121939566e-05,
"std": 0.03361983224749565,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_out.0.bias": {
"min": -0.2114625871181488,
"max": 0.231521874666214,
"mean": -0.005106819327920675,
"std": 0.06188430264592171,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.3.g": {
"min": 0.36219048500061035,
"max": 1.1013058423995972,
"mean": 0.6993670463562012,
"std": 0.053603965789079666,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.4.ff.0.0.weight": {
"min": -0.23459365963935852,
"max": 0.2449057400226593,
"mean": 0.00046347593888640404,
"std": 0.04127476364374161,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.10.4.ff.0.0.bias": {
"min": -0.09808015823364258,
"max": 0.06838114559650421,
"mean": -0.03143930807709694,
"std": 0.01812371425330639,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.10.4.ff.2.weight": {
"min": -0.30170318484306335,
"max": 0.3515554368495941,
"mean": -8.153638191288337e-05,
"std": 0.040280573070049286,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.10.4.ff.2.bias": {
"min": -0.15233194828033447,
"max": 0.14967864751815796,
"mean": 0.00025540069327689707,
"std": 0.023036718368530273,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.1.g": {
"min": 0.99940425157547,
"max": 1.0017729997634888,
"mean": 1.0002546310424805,
"std": 0.0006659556529484689,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_q.weight": {
"min": -0.03126639127731323,
"max": 0.03126263990998268,
"mean": -1.9294351659482345e-05,
"std": 0.018044061958789825,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_q.bias": {
"min": -0.031232889741659164,
"max": 0.03099249303340912,
"mean": -0.001084338640794158,
"std": 0.017953665927052498,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_k.weight": {
"min": -0.031263306736946106,
"max": 0.031267084181308746,
"mean": 3.548895620042458e-06,
"std": 0.018044468015432358,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_k.bias": {
"min": -0.03115880861878395,
"max": 0.031179169192910194,
"mean": 0.0003339822869747877,
"std": 0.018065886572003365,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_out.0.bias": {
"min": -0.00013742789451498538,
"max": 0.00015863632143009454,
"mean": 2.736554449711548e-07,
"std": 4.781073585036211e-05,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.3.g": {
"min": 0.9996252655982971,
"max": 1.0021158456802368,
"mean": 1.0004429817199707,
"std": 0.0006555348518304527,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.4.ff.0.0.weight": {
"min": -0.03161333501338959,
"max": 0.031580716371536255,
"mean": -9.014614079205785e-06,
"std": 0.018046868965029716,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.11.4.ff.0.0.bias": {
"min": -0.031167982146143913,
"max": 0.03145414963364601,
"mean": 0.0002899511018767953,
"std": 0.01800374686717987,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.11.4.ff.2.weight": {
"min": -0.00018904745229519904,
"max": 0.00019723534933291376,
"mean": 1.0521711502065045e-08,
"std": 3.849043423542753e-05,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.11.4.ff.2.bias": {
"min": -0.00014144052693154663,
"max": 0.00015886471373960376,
"mean": 2.7657870305120014e-07,
"std": 4.894055746262893e-05,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.1.g": {
"min": 0.38299599289894104,
"max": 0.7195751070976257,
"mean": 0.5807684659957886,
"std": 0.03886786475777626,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_q.weight": {
"min": -0.23805734515190125,
"max": 0.19658388197422028,
"mean": 2.6588520995574072e-05,
"std": 0.037470221519470215,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_q.bias": {
"min": -0.11865263432264328,
"max": 0.16607660055160522,
"mean": 0.0009905615588650107,
"std": 0.027556024491786957,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_k.weight": {
"min": -0.24617764353752136,
"max": 0.5007338523864746,
"mean": -5.0468875997466967e-05,
"std": 0.03762808069586754,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_k.bias": {
"min": -3.9424328804016113,
"max": 3.7695746421813965,
"mean": -0.003572134766727686,
"std": 0.681464433670044,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_v.weight": {
"min": -0.22736115753650665,
"max": 0.2514519989490509,
"mean": -1.1535179510246962e-05,
"std": 0.037439387291669846,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_v.bias": {
"min": -0.07172132283449173,
"max": 0.08075973391532898,
"mean": -0.0005193240358494222,
"std": 0.0156661756336689,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_out.0.weight": {
"min": -0.2282123565673828,
"max": 0.25804591178894043,
"mean": -2.8565638785948977e-05,
"std": 0.03542618080973625,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_out.0.bias": {
"min": -0.20044255256652832,
"max": 0.21519678831100464,
"mean": -0.005535616539418697,
"std": 0.06834741681814194,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.3.g": {
"min": 0.40515244007110596,
"max": 1.1894633769989014,
"mean": 0.7380411624908447,
"std": 0.055237166583538055,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.4.ff.0.0.weight": {
"min": -0.221146821975708,
"max": 0.24604949355125427,
"mean": 0.0005211484967730939,
"std": 0.041342463344335556,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.12.4.ff.0.0.bias": {
"min": -0.10338832437992096,
"max": 0.02417122572660446,
"mean": -0.03267121687531471,
"std": 0.018886109814047813,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.12.4.ff.2.weight": {
"min": -0.4494054913520813,
"max": 0.4224247634410858,
"mean": -0.0004330066149123013,
"std": 0.046903740614652634,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.12.4.ff.2.bias": {
"min": -0.2513982057571411,
"max": 0.47010472416877747,
"mean": 0.003200565231963992,
"std": 0.04454652965068817,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.0.weight": {
"min": -0.3171570301055908,
"max": 0.33336329460144043,
"mean": -2.526402022340335e-05,
"std": 0.021290859207510948,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.13.1.g": {
"min": 0.3245790898799896,
"max": 0.6854778528213501,
"mean": 0.5710608959197998,
"std": 0.04472013935446739,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_q.weight": {
"min": -0.16466441750526428,
"max": 0.1739748865365982,
"mean": -4.8596641136100516e-05,
"std": 0.03318468853831291,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_q.bias": {
"min": -0.18683482706546783,
"max": 0.14287494122982025,
"mean": 3.6249548429623246e-05,
"std": 0.029692435637116432,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_k.weight": {
"min": -0.38059577345848083,
"max": 0.24607740342617035,
"mean": -9.968647646019235e-06,
"std": 0.03276587277650833,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_k.bias": {
"min": -3.65606689453125,
"max": 3.290353775024414,
"mean": -0.01425391435623169,
"std": 0.9852582812309265,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_v.weight": {
"min": -0.23509447276592255,
"max": 0.24749873578548431,
"mean": -1.7839809515862726e-05,
"std": 0.04170282557606697,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_v.bias": {
"min": -0.07275734841823578,
"max": 0.15453355014324188,
"mean": 0.0006638452177867293,
"std": 0.025170044973492622,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_out.0.weight": {
"min": -0.26656097173690796,
"max": 0.24857115745544434,
"mean": -1.5359542885562405e-05,
"std": 0.040143173187971115,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_out.0.bias": {
"min": -0.18948662281036377,
"max": 0.19466565549373627,
"mean": -0.0012274996843189,
"std": 0.06669430434703827,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.3.g": {
"min": 0.3292614817619324,
"max": 0.9995094537734985,
"mean": 0.7192604541778564,
"std": 0.05234057828783989,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.4.ff.0.0.weight": {
"min": -0.2315857857465744,
"max": 0.24574460089206696,
"mean": 0.00018271194130647928,
"std": 0.04090625420212746,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.13.4.ff.0.0.bias": {
"min": -0.11421883851289749,
"max": 0.018689358606934547,
"mean": -0.04248232766985893,
"std": 0.018854642286896706,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.13.4.ff.2.weight": {
"min": -0.38993996381759644,
"max": 0.4073200523853302,
"mean": -2.1967953216517344e-05,
"std": 0.04854067787528038,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.13.4.ff.2.bias": {
"min": -0.6932199001312256,
"max": 0.4125868082046509,
"mean": 0.0008555519161745906,
"std": 0.06029324233531952,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.0.weight": {
"min": -0.0002173546963604167,
"max": 1.0001165866851807,
"mean": 0.0004882887005805969,
"std": 0.0220916960388422,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.14.1.g": {
"min": 0.9994292855262756,
"max": 1.0017839670181274,
"mean": 1.000253677368164,
"std": 0.000652652932330966,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_q.weight": {
"min": -0.03126111254096031,
"max": 0.0312650129199028,
"mean": -2.1023370209150016e-05,
"std": 0.0180354006588459,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_q.bias": {
"min": -0.031219881027936935,
"max": 0.031236713752150536,
"mean": -0.0006771213375031948,
"std": 0.017829909920692444,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_k.weight": {
"min": -0.03126417100429535,
"max": 0.03126959502696991,
"mean": -8.83279244590085e-06,
"std": 0.018034426495432854,
"sparsity": 9.5367431640625e-07,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_k.bias": {
"min": -0.03123662993311882,
"max": 0.03124932385981083,
"mean": -0.0007298794225789607,
"std": 0.01794484816491604,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_out.0.bias": {
"min": -0.00017386232502758503,
"max": 0.00014760847261641175,
"mean": 3.442557272137492e-06,
"std": 5.325600432115607e-05,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.3.g": {
"min": 0.9995221495628357,
"max": 1.0020443201065063,
"mean": 1.0004539489746094,
"std": 0.000669351196847856,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.4.ff.0.0.weight": {
"min": -0.03147042542695999,
"max": 0.03158598765730858,
"mean": 5.1154065658920445e-06,
"std": 0.018045036122202873,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.14.4.ff.0.0.bias": {
"min": -0.03117763064801693,
"max": 0.031405530869960785,
"mean": 0.00032266404014080763,
"std": 0.0180798526853323,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.14.4.ff.2.weight": {
"min": -0.00019398781296331435,
"max": 0.0002045449218712747,
"mean": 1.7092556845454965e-06,
"std": 3.9782767998985946e-05,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.14.4.ff.2.bias": {
"min": -0.00017830374417826533,
"max": 0.0001471550203859806,
"mean": 3.7268218875396997e-06,
"std": 5.360128852771595e-05,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.0.weight": {
"min": -0.23455342650413513,
"max": 0.27251818776130676,
"mean": 7.011342859186698e-06,
"std": 0.018812235444784164,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.15.1.g": {
"min": 0.3213299512863159,
"max": 0.6936513781547546,
"mean": 0.5816924571990967,
"std": 0.045936986804008484,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_q.weight": {
"min": -0.18183718621730804,
"max": 0.19770397245883942,
"mean": -1.1711626939359121e-05,
"std": 0.033187560737133026,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_q.bias": {
"min": -0.16069863736629486,
"max": 0.12950360774993896,
"mean": -0.001068056095391512,
"std": 0.03414401412010193,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_k.weight": {
"min": -0.33220145106315613,
"max": 0.31142792105674744,
"mean": -1.0354739060858265e-05,
"std": 0.03223816305398941,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_k.bias": {
"min": -7.803721904754639,
"max": 8.76359748840332,
"mean": 0.09347197413444519,
"std": 1.6197658777236938,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_v.weight": {
"min": -0.23378030955791473,
"max": 0.24203070998191833,
"mean": 4.133610491408035e-05,
"std": 0.0408620685338974,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_v.bias": {
"min": -0.07593037933111191,
"max": 0.06580135226249695,
"mean": 0.0004787116195075214,
"std": 0.019414879381656647,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_out.0.weight": {
"min": -0.24592415988445282,
"max": 0.2340637594461441,
"mean": -2.9871353035559878e-06,
"std": 0.03943677991628647,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_out.0.bias": {
"min": -0.1628992110490799,
"max": 0.16083794832229614,
"mean": 0.001633270876482129,
"std": 0.06527844816446304,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.3.g": {
"min": 0.5569714307785034,
"max": 0.9439458250999451,
"mean": 0.7129694819450378,
"std": 0.04013355076313019,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.4.ff.0.0.weight": {
"min": -0.2286839783191681,
"max": 0.2551024854183197,
"mean": -4.545085539575666e-05,
"std": 0.04058132320642471,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.15.4.ff.0.0.bias": {
"min": -0.13476935029029846,
"max": 0.02225329726934433,
"mean": -0.04135678708553314,
"std": 0.018384402617812157,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.15.4.ff.2.weight": {
"min": -0.42168760299682617,
"max": 0.39237409830093384,
"mean": -4.401172191137448e-06,
"std": 0.04779110476374626,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.15.4.ff.2.bias": {
"min": -0.6073517799377441,
"max": 0.6513891220092773,
"mean": 0.0015880158171057701,
"std": 0.05683854594826698,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.0.weight": {
"min": -0.2518226206302643,
"max": 0.3207785189151764,
"mean": -6.094680884416448e-06,
"std": 0.019615668803453445,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.16.1.g": {
"min": 0.3598737120628357,
"max": 0.6824128031730652,
"mean": 0.5707628726959229,
"std": 0.0429723858833313,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_q.weight": {
"min": -0.22058245539665222,
"max": 0.1771002560853958,
"mean": -3.480628220131621e-05,
"std": 0.0343024767935276,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_q.bias": {
"min": -0.16346584260463715,
"max": 0.23297329246997833,
"mean": 0.000366326654329896,
"std": 0.03285832703113556,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_k.weight": {
"min": -0.2638060748577118,
"max": 0.23985332250595093,
"mean": -5.253252311376855e-05,
"std": 0.033901575952768326,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_k.bias": {
"min": -4.8552327156066895,
"max": 5.091460227966309,
"mean": 0.04388260096311569,
"std": 1.2293205261230469,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_v.weight": {
"min": -0.24656128883361816,
"max": 0.2505475580692291,
"mean": 7.217615348054096e-05,
"std": 0.043992768973112106,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_v.bias": {
"min": -0.0626230239868164,
"max": 0.054548561573028564,
"mean": 0.0006508217193186283,
"std": 0.017192188650369644,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_out.0.weight": {
"min": -0.2865524888038635,
"max": 0.2719300389289856,
"mean": -4.991707464796491e-05,
"std": 0.04299106448888779,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_out.0.bias": {
"min": -0.1607704609632492,
"max": 0.17038598656654358,
"mean": -0.0028860813472419977,
"std": 0.05928485840559006,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.3.g": {
"min": 0.5196844339370728,
"max": 0.9328820705413818,
"mean": 0.7135865688323975,
"std": 0.03841733559966087,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.4.ff.0.0.weight": {
"min": -0.23817408084869385,
"max": 0.2493610382080078,
"mean": 0.00046480720629915595,
"std": 0.04046126455068588,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.16.4.ff.0.0.bias": {
"min": -0.14443805813789368,
"max": 0.04147465527057648,
"mean": -0.03969287499785423,
"std": 0.020544789731502533,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.16.4.ff.2.weight": {
"min": -0.5328277945518494,
"max": 0.5829682350158691,
"mean": 6.036185368429869e-06,
"std": 0.048868391662836075,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.16.4.ff.2.bias": {
"min": -0.5192180871963501,
"max": 0.49342840909957886,
"mean": 0.0023608917836099863,
"std": 0.05344958230853081,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.0.weight": {
"min": -0.2736181318759918,
"max": 0.31526556611061096,
"mean": 1.8652735889190808e-06,
"std": 0.020052799955010414,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.17.1.g": {
"min": 0.36623507738113403,
"max": 0.7115861177444458,
"mean": 0.5932326316833496,
"std": 0.045942164957523346,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_q.weight": {
"min": -0.21099260449409485,
"max": 0.19959695637226105,
"mean": 3.07829977828078e-05,
"std": 0.034868910908699036,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_q.bias": {
"min": -0.18723583221435547,
"max": 0.20388372242450714,
"mean": 0.000956192088779062,
"std": 0.031518690288066864,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_k.weight": {
"min": -0.28975075483322144,
"max": 0.3398789167404175,
"mean": -4.732892557512969e-05,
"std": 0.034589968621730804,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_k.bias": {
"min": -3.877439260482788,
"max": 3.3875346183776855,
"mean": 0.014458952471613884,
"std": 0.858471155166626,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_v.weight": {
"min": -0.22435642778873444,
"max": 0.249828040599823,
"mean": -4.0124336919689085e-06,
"std": 0.04223557561635971,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_v.bias": {
"min": -0.05512487143278122,
"max": 0.046701643615961075,
"mean": -1.9162820535711944e-05,
"std": 0.015846921131014824,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_out.0.weight": {
"min": -0.29301708936691284,
"max": 0.29095572233200073,
"mean": -7.334054771490628e-06,
"std": 0.04195055365562439,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_out.0.bias": {
"min": -0.12482845038175583,
"max": 0.25941941142082214,
"mean": -0.003237831173464656,
"std": 0.05315971001982689,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.3.g": {
"min": 0.4561736285686493,
"max": 0.8445789813995361,
"mean": 0.7056531310081482,
"std": 0.035228051245212555,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.4.ff.0.0.weight": {
"min": -0.5114014148712158,
"max": 0.348456472158432,
"mean": 0.00034256701474078,
"std": 0.04020610451698303,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.17.4.ff.0.0.bias": {
"min": -0.18698255717754364,
"max": 0.03949001431465149,
"mean": -0.03939007595181465,
"std": 0.0213507991284132,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.17.4.ff.2.weight": {
"min": -0.544358491897583,
"max": 0.5564395785331726,
"mean": -7.145745621528476e-05,
"std": 0.05074309930205345,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.17.4.ff.2.bias": {
"min": -0.5117879509925842,
"max": 0.6644083857536316,
"mean": 0.002445152960717678,
"std": 0.04953145608305931,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.0.weight": {
"min": -0.33249062299728394,
"max": 0.2656247019767761,
"mean": 3.6327573980088346e-06,
"std": 0.019390461966395378,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.18.1.g": {
"min": 0.3221387565135956,
"max": 0.7663495540618896,
"mean": 0.651084840297699,
"std": 0.04530828446149826,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_q.weight": {
"min": -0.24955259263515472,
"max": 0.21952223777770996,
"mean": -2.4627406673971564e-06,
"std": 0.0365021638572216,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_q.bias": {
"min": -0.32713782787323,
"max": 0.2872367203235626,
"mean": -0.0006778471870347857,
"std": 0.03855384141206741,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_k.weight": {
"min": -0.31010347604751587,
"max": 0.36993831396102905,
"mean": 6.482718890765682e-05,
"std": 0.036242760717868805,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_k.bias": {
"min": -4.71769905090332,
"max": 5.807940483093262,
"mean": 0.03795948997139931,
"std": 1.4132622480392456,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_v.weight": {
"min": -0.2217160314321518,
"max": 0.20588469505310059,
"mean": -7.503203232772648e-05,
"std": 0.04249139502644539,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_v.bias": {
"min": -0.07754088938236237,
"max": 0.051487792283296585,
"mean": -0.0009253682801499963,
"std": 0.016408486291766167,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_out.0.weight": {
"min": -0.3308248519897461,
"max": 0.32916712760925293,
"mean": -4.993749826098792e-06,
"std": 0.042798057198524475,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_out.0.bias": {
"min": -0.2850324213504791,
"max": 0.1117776408791542,
"mean": -0.0012074043042957783,
"std": 0.047010280191898346,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.3.g": {
"min": 0.4863123297691345,
"max": 0.8869433403015137,
"mean": 0.7375507354736328,
"std": 0.03823651745915413,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.4.ff.0.0.weight": {
"min": -0.36125612258911133,
"max": 0.27433156967163086,
"mean": 5.119972047396004e-05,
"std": 0.04065272584557533,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.18.4.ff.0.0.bias": {
"min": -0.2477303296327591,
"max": 0.04647788032889366,
"mean": -0.03926857188344002,
"std": 0.023257533088326454,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.18.4.ff.2.weight": {
"min": -0.6263415217399597,
"max": 0.5970607399940491,
"mean": -6.0351769207045436e-05,
"std": 0.05312627553939819,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.18.4.ff.2.bias": {
"min": -0.709812343120575,
"max": 0.2658604085445404,
"mean": 0.0009171634446829557,
"std": 0.051236364990472794,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.0.weight": {
"min": -0.3433721363544464,
"max": 0.30349576473236084,
"mean": 1.867878154371283e-07,
"std": 0.019139809533953667,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.19.1.g": {
"min": 0.34990525245666504,
"max": 0.7829033136367798,
"mean": 0.6388983726501465,
"std": 0.04923005402088165,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_q.weight": {
"min": -0.20573130249977112,
"max": 0.2069031298160553,
"mean": -5.999910717946477e-05,
"std": 0.037698354572057724,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_q.bias": {
"min": -0.25860944390296936,
"max": 0.2683144211769104,
"mean": -0.00040654174517840147,
"std": 0.04462500661611557,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_k.weight": {
"min": -0.3541562557220459,
"max": 0.3225262761116028,
"mean": -7.357165486610029e-06,
"std": 0.03720669820904732,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_k.bias": {
"min": -5.261901378631592,
"max": 4.204929351806641,
"mean": -0.026422729715704918,
"std": 1.0068349838256836,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_v.weight": {
"min": -0.23875762522220612,
"max": 0.24374397099018097,
"mean": -2.557489278842695e-05,
"std": 0.04321581870317459,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_v.bias": {
"min": -0.06234561279416084,
"max": 0.05673680081963539,
"mean": 0.00034723637509159744,
"std": 0.01415068656206131,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_out.0.weight": {
"min": -0.4374503195285797,
"max": 0.37361523509025574,
"mean": 1.4507659216178581e-05,
"std": 0.044127773493528366,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_out.0.bias": {
"min": -0.09634225070476532,
"max": 0.17621064186096191,
"mean": -0.0006586947711184621,
"std": 0.035146258771419525,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.3.g": {
"min": 0.421725332736969,
"max": 1.0694254636764526,
"mean": 0.7485451698303223,
"std": 0.04206714406609535,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.4.ff.0.0.weight": {
"min": -0.2659734785556793,
"max": 0.2969002425670624,
"mean": -7.885815284680575e-05,
"std": 0.04081321880221367,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.19.4.ff.0.0.bias": {
"min": -0.18494504690170288,
"max": 0.043268244713544846,
"mean": -0.03681334853172302,
"std": 0.025581398978829384,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.19.4.ff.2.weight": {
"min": -0.4577294886112213,
"max": 0.4868638217449188,
"mean": 4.411918780533597e-05,
"std": 0.054221056401729584,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.19.4.ff.2.bias": {
"min": -0.286346971988678,
"max": 0.5518361330032349,
"mean": -0.0008815097389742732,
"std": 0.04783621430397034,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.0.weight": {
"min": -0.29267972707748413,
"max": 0.3227570652961731,
"mean": 6.020641194481868e-06,
"std": 0.019972950220108032,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.20.1.g": {
"min": 0.2912514805793762,
"max": 0.7601991891860962,
"mean": 0.6508588194847107,
"std": 0.05212089791893959,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_q.weight": {
"min": -0.2437000423669815,
"max": 0.26162612438201904,
"mean": -5.554972631216515e-06,
"std": 0.039614368230104446,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_q.bias": {
"min": -0.2675025463104248,
"max": 0.20013028383255005,
"mean": -0.0008774266461841762,
"std": 0.05176888778805733,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_k.weight": {
"min": -0.27221566438674927,
"max": 0.25374382734298706,
"mean": 5.006398168916348e-06,
"std": 0.03871097415685654,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_k.bias": {
"min": -12.966026306152344,
"max": 15.947824478149414,
"mean": 0.03323008120059967,
"std": 1.989342451095581,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_v.weight": {
"min": -0.20656642317771912,
"max": 0.22588562965393066,
"mean": -7.24760175216943e-05,
"std": 0.040559086948633194,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_v.bias": {
"min": -0.06937043368816376,
"max": 0.06317680329084396,
"mean": 0.000156470196088776,
"std": 0.014745255932211876,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_out.0.weight": {
"min": -0.46550098061561584,
"max": 0.32025203108787537,
"mean": 1.966371200978756e-05,
"std": 0.04059458151459694,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_out.0.bias": {
"min": -0.06405901163816452,
"max": 0.11548515409231186,
"mean": 0.0011954698711633682,
"std": 0.024709828197956085,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.3.g": {
"min": 0.37493425607681274,
"max": 0.9319035410881042,
"mean": 0.7510924339294434,
"std": 0.0401909314095974,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.4.ff.0.0.weight": {
"min": -0.27919864654541016,
"max": 0.273176908493042,
"mean": -0.0001684028684394434,
"std": 0.041004277765750885,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.20.4.ff.0.0.bias": {
"min": -0.19848693907260895,
"max": 0.05126062035560608,
"mean": -0.032024383544921875,
"std": 0.025078732520341873,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.20.4.ff.2.weight": {
"min": -0.6584433317184448,
"max": 0.5357221961021423,
"mean": -4.880438791587949e-05,
"std": 0.05285734310746193,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.20.4.ff.2.bias": {
"min": -0.19274669885635376,
"max": 0.5823217630386353,
"mean": -0.0005133696831762791,
"std": 0.041087545454502106,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.0.weight": {
"min": -0.4175601005554199,
"max": 0.37188875675201416,
"mean": 6.479064722952899e-06,
"std": 0.021628154441714287,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.21.1.g": {
"min": 0.2145100235939026,
"max": 0.7467755675315857,
"mean": 0.6495225429534912,
"std": 0.054342612624168396,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_q.weight": {
"min": -0.20954373478889465,
"max": 0.19555190205574036,
"mean": 4.0139111661119387e-05,
"std": 0.03946155682206154,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_q.bias": {
"min": -0.32948848605155945,
"max": 0.2595402002334595,
"mean": -0.0032335962168872356,
"std": 0.05627242103219032,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_k.weight": {
"min": -0.2058991640806198,
"max": 0.2547155022621155,
"mean": 5.40805995115079e-05,
"std": 0.03856402263045311,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_k.bias": {
"min": -6.243993759155273,
"max": 6.932845115661621,
"mean": 0.048340216279029846,
"std": 1.385199785232544,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_v.weight": {
"min": -0.20978908240795135,
"max": 0.23056426644325256,
"mean": -4.742521468870109e-06,
"std": 0.04131828248500824,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_v.bias": {
"min": -0.04378769174218178,
"max": 0.0359850712120533,
"mean": -6.261238013394177e-06,
"std": 0.012797025963664055,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_out.0.weight": {
"min": -0.39764100313186646,
"max": 0.34504374861717224,
"mean": -5.53192148800008e-05,
"std": 0.0423952080309391,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_out.0.bias": {
"min": -0.05508939549326897,
"max": 0.06280933320522308,
"mean": 0.0003585501981433481,
"std": 0.018675601109862328,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.3.g": {
"min": 0.3507746756076813,
"max": 1.0452601909637451,
"mean": 0.7896535992622375,
"std": 0.04874108359217644,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.4.ff.0.0.weight": {
"min": -0.3336845338344574,
"max": 0.38642778992652893,
"mean": -0.00016908602265175432,
"std": 0.041490186005830765,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.21.4.ff.0.0.bias": {
"min": -0.1574612259864807,
"max": 0.05922037363052368,
"mean": -0.03182276338338852,
"std": 0.025103161111474037,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.21.4.ff.2.weight": {
"min": -0.6963140964508057,
"max": 0.46921107172966003,
"mean": -8.656673162477091e-05,
"std": 0.05180606618523598,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.21.4.ff.2.bias": {
"min": -0.24794527888298035,
"max": 0.3287939429283142,
"mean": -0.00025959889171645045,
"std": 0.04145469143986702,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.0.weight": {
"min": -0.28705933690071106,
"max": 0.3503926694393158,
"mean": -2.8700230814138195e-06,
"std": 0.024241898208856583,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.22.1.g": {
"min": 0.19675415754318237,
"max": 0.7791337370872498,
"mean": 0.6702517867088318,
"std": 0.05866968631744385,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_q.weight": {
"min": -0.22908443212509155,
"max": 0.2313445806503296,
"mean": -2.062591738649644e-05,
"std": 0.040440406650304794,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_q.bias": {
"min": -0.22002485394477844,
"max": 0.24098847806453705,
"mean": 0.00078444869723171,
"std": 0.0558483712375164,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_k.weight": {
"min": -0.21667493879795074,
"max": 0.22645404934883118,
"mean": -7.211311458377168e-05,
"std": 0.03937484323978424,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_k.bias": {
"min": -8.906242370605469,
"max": 9.069114685058594,
"mean": -0.0012534279376268387,
"std": 1.8484383821487427,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_v.weight": {
"min": -0.2695206105709076,
"max": 0.2589607834815979,
"mean": 4.368612644611858e-05,
"std": 0.03841120004653931,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_v.bias": {
"min": -0.05792244151234627,
"max": 0.05800376832485199,
"mean": 0.0003531992551870644,
"std": 0.014716269448399544,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_out.0.weight": {
"min": -0.2641335129737854,
"max": 0.2883334755897522,
"mean": -6.170988490339369e-05,
"std": 0.03907797113060951,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_out.0.bias": {
"min": -0.043938618153333664,
"max": 0.037385016679763794,
"mean": -9.84332655207254e-05,
"std": 0.013347743079066277,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.3.g": {
"min": 0.3393842577934265,
"max": 1.0925544500350952,
"mean": 0.8639589548110962,
"std": 0.0638754740357399,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.4.ff.0.0.weight": {
"min": -0.4231780469417572,
"max": 0.41907352209091187,
"mean": 0.0003135594888590276,
"std": 0.04351302981376648,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.22.4.ff.0.0.bias": {
"min": -0.21478679776191711,
"max": 0.1706700474023819,
"mean": -0.02944377437233925,
"std": 0.03187936916947365,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.22.4.ff.2.weight": {
"min": -0.5987504720687866,
"max": 0.5598719120025635,
"mean": -0.00014867217396385968,
"std": 0.05346066504716873,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.22.4.ff.2.bias": {
"min": -0.17880699038505554,
"max": 0.37724727392196655,
"mean": 0.0013524596579372883,
"std": 0.037310197949409485,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.0.weight": {
"min": -0.39442750811576843,
"max": 0.3689110279083252,
"mean": 3.764010398299433e-05,
"std": 0.028617940843105316,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.23.1.g": {
"min": 0.29055094718933105,
"max": 0.8275657296180725,
"mean": 0.7055599689483643,
"std": 0.06785259395837784,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_q.weight": {
"min": -0.9265406131744385,
"max": 1.0269172191619873,
"mean": -2.7786163627752103e-05,
"std": 0.04764207825064659,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_q.bias": {
"min": -0.8793070316314697,
"max": 0.8158283829689026,
"mean": -0.0003010375367011875,
"std": 0.09555298835039139,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_k.weight": {
"min": -0.26992541551589966,
"max": 0.24092742800712585,
"mean": -2.246434632979799e-05,
"std": 0.03895093873143196,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_k.bias": {
"min": -23.743555068969727,
"max": 22.852014541625977,
"mean": -0.09188304841518402,
"std": 4.070625305175781,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_v.weight": {
"min": -0.22777004539966583,
"max": 0.2455480843782425,
"mean": -2.5490313419140875e-05,
"std": 0.03864210844039917,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_v.bias": {
"min": -0.060185808688402176,
"max": 0.04548603296279907,
"mean": -0.00013778329594060779,
"std": 0.014688468538224697,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_out.0.weight": {
"min": -0.33804869651794434,
"max": 0.3748103082180023,
"mean": 7.576927600894123e-06,
"std": 0.04082098975777626,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_out.0.bias": {
"min": -0.046251166611909866,
"max": 0.19543442130088806,
"mean": 0.00027753060567192733,
"std": 0.013553835451602936,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.3.g": {
"min": 0.37363529205322266,
"max": 1.1304537057876587,
"mean": 0.8902342319488525,
"std": 0.06401188671588898,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.4.ff.0.0.weight": {
"min": -0.44750913977622986,
"max": 0.5426135659217834,
"mean": 2.5048013412742876e-05,
"std": 0.0455806739628315,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.23.4.ff.0.0.bias": {
"min": -0.22384138405323029,
"max": 0.08764129132032394,
"mean": -0.03201291710138321,
"std": 0.03774724155664444,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.23.4.ff.2.weight": {
"min": -0.7260749936103821,
"max": 0.688654899597168,
"mean": 3.5635155654745176e-05,
"std": 0.051793280988931656,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.23.4.ff.2.bias": {
"min": -0.17447420954704285,
"max": 0.21816052496433258,
"mean": 3.443963942117989e-05,
"std": 0.03176717460155487,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.0.weight": {
"min": -0.33968257904052734,
"max": 0.3729552924633026,
"mean": 4.328345676185563e-05,
"std": 0.034136127680540085,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.24.1.g": {
"min": 0.3178211450576782,
"max": 1.2872322797775269,
"mean": 0.6015591025352478,
"std": 0.08348726481199265,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_q.weight": {
"min": -0.28302425146102905,
"max": 0.26023271679878235,
"mean": -2.7253747703070985e-06,
"std": 0.0359804667532444,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_q.bias": {
"min": -0.23563744127750397,
"max": 0.20571035146713257,
"mean": 0.00023820970091037452,
"std": 0.056028686463832855,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_k.weight": {
"min": -0.43542271852493286,
"max": 0.3249562382698059,
"mean": 2.4268334527732804e-05,
"std": 0.034124359488487244,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_k.bias": {
"min": -5.546493053436279,
"max": 7.314059257507324,
"mean": -0.007369840517640114,
"std": 0.6993855834007263,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_v.weight": {
"min": -0.34410950541496277,
"max": 0.36279547214508057,
"mean": 0.0001030894200084731,
"std": 0.04783707857131958,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_v.bias": {
"min": -0.07371430099010468,
"max": 0.060424793511629105,
"mean": 0.0009352926863357425,
"std": 0.01493847742676735,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_out.0.weight": {
"min": -0.2562869191169739,
"max": 0.2867131233215332,
"mean": 4.736550181405619e-06,
"std": 0.04156505689024925,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_out.0.bias": {
"min": -0.0553305447101593,
"max": 0.06281695514917374,
"mean": 0.00012849000631831586,
"std": 0.007162065710872412,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.3.g": {
"min": 0.49391981959342957,
"max": 1.220736026763916,
"mean": 1.0135732889175415,
"std": 0.11749263107776642,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.4.ff.0.0.weight": {
"min": -1.0939218997955322,
"max": 1.0474658012390137,
"mean": -4.883138171862811e-05,
"std": 0.05241798609495163,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.24.4.ff.0.0.bias": {
"min": -0.223901629447937,
"max": 0.17314252257347107,
"mean": -0.027228916063904762,
"std": 0.03630804270505905,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.24.4.ff.2.weight": {
"min": -0.8840344548225403,
"max": 0.9224310517311096,
"mean": -0.00014670705422759056,
"std": 0.053297851234674454,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.24.4.ff.2.bias": {
"min": -0.17102152109146118,
"max": 0.3797409236431122,
"mean": 0.003368864767253399,
"std": 0.0398765504360199,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.0.weight": {
"min": -0.7776780724525452,
"max": 0.7227001190185547,
"mean": 1.787853761925362e-05,
"std": 0.04615465924143791,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.25.1.g": {
"min": 0.3386647403240204,
"max": 1.4281901121139526,
"mean": 0.9484964609146118,
"std": 0.20680245757102966,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_q.weight": {
"min": -1.745869517326355,
"max": 1.7045400142669678,
"mean": 0.00022709640325047076,
"std": 0.15870508551597595,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_q.bias": {
"min": -1.1994972229003906,
"max": 1.1010137796401978,
"mean": -0.009549295529723167,
"std": 0.20389875769615173,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_k.weight": {
"min": -0.4210166335105896,
"max": 0.4279645085334778,
"mean": 6.39720747130923e-05,
"std": 0.04802015796303749,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_k.bias": {
"min": -19.747936248779297,
"max": 19.543052673339844,
"mean": -0.24834343791007996,
"std": 4.777070999145508,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_v.weight": {
"min": -0.3238843083381653,
"max": 0.4385298192501068,
"mean": -1.1759563676605467e-05,
"std": 0.04616716504096985,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_v.bias": {
"min": -0.03387872874736786,
"max": 0.036932073533535004,
"mean": 0.0006410478381440043,
"std": 0.01291597355157137,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_out.0.weight": {
"min": -0.7035592198371887,
"max": 0.6685189604759216,
"mean": 4.281650763005018e-05,
"std": 0.05789238214492798,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_out.0.bias": {
"min": -0.07232622057199478,
"max": 0.06769084185361862,
"mean": -0.00013414367276709527,
"std": 0.012906934134662151,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.3.g": {
"min": 0.3805098831653595,
"max": 1.3928314447402954,
"mean": 1.0667389631271362,
"std": 0.21977593004703522,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.4.ff.0.0.weight": {
"min": -0.6165266633033752,
"max": 0.7183749079704285,
"mean": 0.00011245780478930101,
"std": 0.05802787095308304,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.25.4.ff.0.0.bias": {
"min": -0.21882832050323486,
"max": 0.2250150591135025,
"mean": 0.006199384108185768,
"std": 0.049713458865880966,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.25.4.ff.2.weight": {
"min": -0.6297744512557983,
"max": 0.8895941972732544,
"mean": 1.2031738151563331e-05,
"std": 0.023544643074274063,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.25.4.ff.2.bias": {
"min": -0.506857693195343,
"max": 0.47375017404556274,
"mean": -0.003018573159351945,
"std": 0.06925369799137115,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.norm_out.g": {
"min": 0.5381409525871277,
"max": 1.1801701784133911,
"mean": 0.7828266620635986,
"std": 0.09875727444887161,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.proj_out.weight": {
"min": -0.2670648992061615,
"max": 0.21295404434204102,
"mean": -0.0002240903995698318,
"std": 0.054007235914468765,
"sparsity": 0.0,
"shape": [
100,
1024
]
},
"transformer.proj_out.bias": {
"min": -0.23832593858242035,
"max": 0.014832383021712303,
"mean": -0.043932899832725525,
"std": 0.03429204970598221,
"sparsity": 0.0,
"shape": [
100
]
}
}
}