zombievip11 / model_analysis.json
memevis's picture
Upload folder using huggingface_hub
207045a verified
{
"layer_types": {
"transformer": 391
},
"parameter_counts": {
"transformer.time_embed.time_mlp.0.weight": 262144,
"transformer.time_embed.time_mlp.0.bias": 1024,
"transformer.time_embed.time_mlp.2.weight": 1048576,
"transformer.time_embed.time_mlp.2.bias": 1024,
"transformer.text_embed.text_embed.weight": 254600,
"transformer.input_embed.proj.weight": 307200,
"transformer.input_embed.proj.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024,
"transformer.layers.0.1.g": 1024,
"transformer.layers.0.2.to_q.weight": 1048576,
"transformer.layers.0.2.to_q.bias": 1024,
"transformer.layers.0.2.to_k.weight": 1048576,
"transformer.layers.0.2.to_k.bias": 1024,
"transformer.layers.0.2.to_v.weight": 1048576,
"transformer.layers.0.2.to_v.bias": 1024,
"transformer.layers.0.2.to_out.0.weight": 1048576,
"transformer.layers.0.2.to_out.0.bias": 1024,
"transformer.layers.0.3.g": 1024,
"transformer.layers.0.4.ff.0.0.weight": 4194304,
"transformer.layers.0.4.ff.0.0.bias": 4096,
"transformer.layers.0.4.ff.2.weight": 4194304,
"transformer.layers.0.4.ff.2.bias": 1024,
"transformer.layers.1.1.g": 1024,
"transformer.layers.1.2.to_q.weight": 1048576,
"transformer.layers.1.2.to_q.bias": 1024,
"transformer.layers.1.2.to_k.weight": 1048576,
"transformer.layers.1.2.to_k.bias": 1024,
"transformer.layers.1.2.to_v.weight": 1048576,
"transformer.layers.1.2.to_v.bias": 1024,
"transformer.layers.1.2.to_out.0.weight": 1048576,
"transformer.layers.1.2.to_out.0.bias": 1024,
"transformer.layers.1.3.g": 1024,
"transformer.layers.1.4.ff.0.0.weight": 4194304,
"transformer.layers.1.4.ff.0.0.bias": 4096,
"transformer.layers.1.4.ff.2.weight": 4194304,
"transformer.layers.1.4.ff.2.bias": 1024,
"transformer.layers.2.1.g": 1024,
"transformer.layers.2.2.to_q.weight": 1048576,
"transformer.layers.2.2.to_q.bias": 1024,
"transformer.layers.2.2.to_k.weight": 1048576,
"transformer.layers.2.2.to_k.bias": 1024,
"transformer.layers.2.2.to_v.weight": 1048576,
"transformer.layers.2.2.to_v.bias": 1024,
"transformer.layers.2.2.to_out.0.weight": 1048576,
"transformer.layers.2.2.to_out.0.bias": 1024,
"transformer.layers.2.3.g": 1024,
"transformer.layers.2.4.ff.0.0.weight": 4194304,
"transformer.layers.2.4.ff.0.0.bias": 4096,
"transformer.layers.2.4.ff.2.weight": 4194304,
"transformer.layers.2.4.ff.2.bias": 1024,
"transformer.layers.3.1.g": 1024,
"transformer.layers.3.2.to_q.weight": 1048576,
"transformer.layers.3.2.to_q.bias": 1024,
"transformer.layers.3.2.to_k.weight": 1048576,
"transformer.layers.3.2.to_k.bias": 1024,
"transformer.layers.3.2.to_v.weight": 1048576,
"transformer.layers.3.2.to_v.bias": 1024,
"transformer.layers.3.2.to_out.0.weight": 1048576,
"transformer.layers.3.2.to_out.0.bias": 1024,
"transformer.layers.3.3.g": 1024,
"transformer.layers.3.4.ff.0.0.weight": 4194304,
"transformer.layers.3.4.ff.0.0.bias": 4096,
"transformer.layers.3.4.ff.2.weight": 4194304,
"transformer.layers.3.4.ff.2.bias": 1024,
"transformer.layers.4.1.g": 1024,
"transformer.layers.4.2.to_q.weight": 1048576,
"transformer.layers.4.2.to_q.bias": 1024,
"transformer.layers.4.2.to_k.weight": 1048576,
"transformer.layers.4.2.to_k.bias": 1024,
"transformer.layers.4.2.to_v.weight": 1048576,
"transformer.layers.4.2.to_v.bias": 1024,
"transformer.layers.4.2.to_out.0.weight": 1048576,
"transformer.layers.4.2.to_out.0.bias": 1024,
"transformer.layers.4.3.g": 1024,
"transformer.layers.4.4.ff.0.0.weight": 4194304,
"transformer.layers.4.4.ff.0.0.bias": 4096,
"transformer.layers.4.4.ff.2.weight": 4194304,
"transformer.layers.4.4.ff.2.bias": 1024,
"transformer.layers.5.1.g": 1024,
"transformer.layers.5.2.to_q.weight": 1048576,
"transformer.layers.5.2.to_q.bias": 1024,
"transformer.layers.5.2.to_k.weight": 1048576,
"transformer.layers.5.2.to_k.bias": 1024,
"transformer.layers.5.2.to_v.weight": 1048576,
"transformer.layers.5.2.to_v.bias": 1024,
"transformer.layers.5.2.to_out.0.weight": 1048576,
"transformer.layers.5.2.to_out.0.bias": 1024,
"transformer.layers.5.3.g": 1024,
"transformer.layers.5.4.ff.0.0.weight": 4194304,
"transformer.layers.5.4.ff.0.0.bias": 4096,
"transformer.layers.5.4.ff.2.weight": 4194304,
"transformer.layers.5.4.ff.2.bias": 1024,
"transformer.layers.6.1.g": 1024,
"transformer.layers.6.2.to_q.weight": 1048576,
"transformer.layers.6.2.to_q.bias": 1024,
"transformer.layers.6.2.to_k.weight": 1048576,
"transformer.layers.6.2.to_k.bias": 1024,
"transformer.layers.6.2.to_v.weight": 1048576,
"transformer.layers.6.2.to_v.bias": 1024,
"transformer.layers.6.2.to_out.0.weight": 1048576,
"transformer.layers.6.2.to_out.0.bias": 1024,
"transformer.layers.6.3.g": 1024,
"transformer.layers.6.4.ff.0.0.weight": 4194304,
"transformer.layers.6.4.ff.0.0.bias": 4096,
"transformer.layers.6.4.ff.2.weight": 4194304,
"transformer.layers.6.4.ff.2.bias": 1024,
"transformer.layers.7.1.g": 1024,
"transformer.layers.7.2.to_q.weight": 1048576,
"transformer.layers.7.2.to_q.bias": 1024,
"transformer.layers.7.2.to_k.weight": 1048576,
"transformer.layers.7.2.to_k.bias": 1024,
"transformer.layers.7.2.to_v.weight": 1048576,
"transformer.layers.7.2.to_v.bias": 1024,
"transformer.layers.7.2.to_out.0.weight": 1048576,
"transformer.layers.7.2.to_out.0.bias": 1024,
"transformer.layers.7.3.g": 1024,
"transformer.layers.7.4.ff.0.0.weight": 4194304,
"transformer.layers.7.4.ff.0.0.bias": 4096,
"transformer.layers.7.4.ff.2.weight": 4194304,
"transformer.layers.7.4.ff.2.bias": 1024,
"transformer.layers.8.1.g": 1024,
"transformer.layers.8.2.to_q.weight": 1048576,
"transformer.layers.8.2.to_q.bias": 1024,
"transformer.layers.8.2.to_k.weight": 1048576,
"transformer.layers.8.2.to_k.bias": 1024,
"transformer.layers.8.2.to_v.weight": 1048576,
"transformer.layers.8.2.to_v.bias": 1024,
"transformer.layers.8.2.to_out.0.weight": 1048576,
"transformer.layers.8.2.to_out.0.bias": 1024,
"transformer.layers.8.3.g": 1024,
"transformer.layers.8.4.ff.0.0.weight": 4194304,
"transformer.layers.8.4.ff.0.0.bias": 4096,
"transformer.layers.8.4.ff.2.weight": 4194304,
"transformer.layers.8.4.ff.2.bias": 1024,
"transformer.layers.9.1.g": 1024,
"transformer.layers.9.2.to_q.weight": 1048576,
"transformer.layers.9.2.to_q.bias": 1024,
"transformer.layers.9.2.to_k.weight": 1048576,
"transformer.layers.9.2.to_k.bias": 1024,
"transformer.layers.9.2.to_v.weight": 1048576,
"transformer.layers.9.2.to_v.bias": 1024,
"transformer.layers.9.2.to_out.0.weight": 1048576,
"transformer.layers.9.2.to_out.0.bias": 1024,
"transformer.layers.9.3.g": 1024,
"transformer.layers.9.4.ff.0.0.weight": 4194304,
"transformer.layers.9.4.ff.0.0.bias": 4096,
"transformer.layers.9.4.ff.2.weight": 4194304,
"transformer.layers.9.4.ff.2.bias": 1024,
"transformer.layers.10.1.g": 1024,
"transformer.layers.10.2.to_q.weight": 1048576,
"transformer.layers.10.2.to_q.bias": 1024,
"transformer.layers.10.2.to_k.weight": 1048576,
"transformer.layers.10.2.to_k.bias": 1024,
"transformer.layers.10.2.to_v.weight": 1048576,
"transformer.layers.10.2.to_v.bias": 1024,
"transformer.layers.10.2.to_out.0.weight": 1048576,
"transformer.layers.10.2.to_out.0.bias": 1024,
"transformer.layers.10.3.g": 1024,
"transformer.layers.10.4.ff.0.0.weight": 4194304,
"transformer.layers.10.4.ff.0.0.bias": 4096,
"transformer.layers.10.4.ff.2.weight": 4194304,
"transformer.layers.10.4.ff.2.bias": 1024,
"transformer.layers.11.1.g": 1024,
"transformer.layers.11.2.to_q.weight": 1048576,
"transformer.layers.11.2.to_q.bias": 1024,
"transformer.layers.11.2.to_k.weight": 1048576,
"transformer.layers.11.2.to_k.bias": 1024,
"transformer.layers.11.2.to_v.weight": 1048576,
"transformer.layers.11.2.to_v.bias": 1024,
"transformer.layers.11.2.to_out.0.weight": 1048576,
"transformer.layers.11.2.to_out.0.bias": 1024,
"transformer.layers.11.3.g": 1024,
"transformer.layers.11.4.ff.0.0.weight": 4194304,
"transformer.layers.11.4.ff.0.0.bias": 4096,
"transformer.layers.11.4.ff.2.weight": 4194304,
"transformer.layers.11.4.ff.2.bias": 1024,
"transformer.layers.12.1.g": 1024,
"transformer.layers.12.2.to_q.weight": 1048576,
"transformer.layers.12.2.to_q.bias": 1024,
"transformer.layers.12.2.to_k.weight": 1048576,
"transformer.layers.12.2.to_k.bias": 1024,
"transformer.layers.12.2.to_v.weight": 1048576,
"transformer.layers.12.2.to_v.bias": 1024,
"transformer.layers.12.2.to_out.0.weight": 1048576,
"transformer.layers.12.2.to_out.0.bias": 1024,
"transformer.layers.12.3.g": 1024,
"transformer.layers.12.4.ff.0.0.weight": 4194304,
"transformer.layers.12.4.ff.0.0.bias": 4096,
"transformer.layers.12.4.ff.2.weight": 4194304,
"transformer.layers.12.4.ff.2.bias": 1024,
"transformer.layers.13.0.weight": 2097152,
"transformer.layers.13.1.g": 1024,
"transformer.layers.13.2.to_q.weight": 1048576,
"transformer.layers.13.2.to_q.bias": 1024,
"transformer.layers.13.2.to_k.weight": 1048576,
"transformer.layers.13.2.to_k.bias": 1024,
"transformer.layers.13.2.to_v.weight": 1048576,
"transformer.layers.13.2.to_v.bias": 1024,
"transformer.layers.13.2.to_out.0.weight": 1048576,
"transformer.layers.13.2.to_out.0.bias": 1024,
"transformer.layers.13.3.g": 1024,
"transformer.layers.13.4.ff.0.0.weight": 4194304,
"transformer.layers.13.4.ff.0.0.bias": 4096,
"transformer.layers.13.4.ff.2.weight": 4194304,
"transformer.layers.13.4.ff.2.bias": 1024,
"transformer.layers.14.0.weight": 2097152,
"transformer.layers.14.1.g": 1024,
"transformer.layers.14.2.to_q.weight": 1048576,
"transformer.layers.14.2.to_q.bias": 1024,
"transformer.layers.14.2.to_k.weight": 1048576,
"transformer.layers.14.2.to_k.bias": 1024,
"transformer.layers.14.2.to_v.weight": 1048576,
"transformer.layers.14.2.to_v.bias": 1024,
"transformer.layers.14.2.to_out.0.weight": 1048576,
"transformer.layers.14.2.to_out.0.bias": 1024,
"transformer.layers.14.3.g": 1024,
"transformer.layers.14.4.ff.0.0.weight": 4194304,
"transformer.layers.14.4.ff.0.0.bias": 4096,
"transformer.layers.14.4.ff.2.weight": 4194304,
"transformer.layers.14.4.ff.2.bias": 1024,
"transformer.layers.15.0.weight": 2097152,
"transformer.layers.15.1.g": 1024,
"transformer.layers.15.2.to_q.weight": 1048576,
"transformer.layers.15.2.to_q.bias": 1024,
"transformer.layers.15.2.to_k.weight": 1048576,
"transformer.layers.15.2.to_k.bias": 1024,
"transformer.layers.15.2.to_v.weight": 1048576,
"transformer.layers.15.2.to_v.bias": 1024,
"transformer.layers.15.2.to_out.0.weight": 1048576,
"transformer.layers.15.2.to_out.0.bias": 1024,
"transformer.layers.15.3.g": 1024,
"transformer.layers.15.4.ff.0.0.weight": 4194304,
"transformer.layers.15.4.ff.0.0.bias": 4096,
"transformer.layers.15.4.ff.2.weight": 4194304,
"transformer.layers.15.4.ff.2.bias": 1024,
"transformer.layers.16.0.weight": 2097152,
"transformer.layers.16.1.g": 1024,
"transformer.layers.16.2.to_q.weight": 1048576,
"transformer.layers.16.2.to_q.bias": 1024,
"transformer.layers.16.2.to_k.weight": 1048576,
"transformer.layers.16.2.to_k.bias": 1024,
"transformer.layers.16.2.to_v.weight": 1048576,
"transformer.layers.16.2.to_v.bias": 1024,
"transformer.layers.16.2.to_out.0.weight": 1048576,
"transformer.layers.16.2.to_out.0.bias": 1024,
"transformer.layers.16.3.g": 1024,
"transformer.layers.16.4.ff.0.0.weight": 4194304,
"transformer.layers.16.4.ff.0.0.bias": 4096,
"transformer.layers.16.4.ff.2.weight": 4194304,
"transformer.layers.16.4.ff.2.bias": 1024,
"transformer.layers.17.0.weight": 2097152,
"transformer.layers.17.1.g": 1024,
"transformer.layers.17.2.to_q.weight": 1048576,
"transformer.layers.17.2.to_q.bias": 1024,
"transformer.layers.17.2.to_k.weight": 1048576,
"transformer.layers.17.2.to_k.bias": 1024,
"transformer.layers.17.2.to_v.weight": 1048576,
"transformer.layers.17.2.to_v.bias": 1024,
"transformer.layers.17.2.to_out.0.weight": 1048576,
"transformer.layers.17.2.to_out.0.bias": 1024,
"transformer.layers.17.3.g": 1024,
"transformer.layers.17.4.ff.0.0.weight": 4194304,
"transformer.layers.17.4.ff.0.0.bias": 4096,
"transformer.layers.17.4.ff.2.weight": 4194304,
"transformer.layers.17.4.ff.2.bias": 1024,
"transformer.layers.18.0.weight": 2097152,
"transformer.layers.18.1.g": 1024,
"transformer.layers.18.2.to_q.weight": 1048576,
"transformer.layers.18.2.to_q.bias": 1024,
"transformer.layers.18.2.to_k.weight": 1048576,
"transformer.layers.18.2.to_k.bias": 1024,
"transformer.layers.18.2.to_v.weight": 1048576,
"transformer.layers.18.2.to_v.bias": 1024,
"transformer.layers.18.2.to_out.0.weight": 1048576,
"transformer.layers.18.2.to_out.0.bias": 1024,
"transformer.layers.18.3.g": 1024,
"transformer.layers.18.4.ff.0.0.weight": 4194304,
"transformer.layers.18.4.ff.0.0.bias": 4096,
"transformer.layers.18.4.ff.2.weight": 4194304,
"transformer.layers.18.4.ff.2.bias": 1024,
"transformer.layers.19.0.weight": 2097152,
"transformer.layers.19.1.g": 1024,
"transformer.layers.19.2.to_q.weight": 1048576,
"transformer.layers.19.2.to_q.bias": 1024,
"transformer.layers.19.2.to_k.weight": 1048576,
"transformer.layers.19.2.to_k.bias": 1024,
"transformer.layers.19.2.to_v.weight": 1048576,
"transformer.layers.19.2.to_v.bias": 1024,
"transformer.layers.19.2.to_out.0.weight": 1048576,
"transformer.layers.19.2.to_out.0.bias": 1024,
"transformer.layers.19.3.g": 1024,
"transformer.layers.19.4.ff.0.0.weight": 4194304,
"transformer.layers.19.4.ff.0.0.bias": 4096,
"transformer.layers.19.4.ff.2.weight": 4194304,
"transformer.layers.19.4.ff.2.bias": 1024,
"transformer.layers.20.0.weight": 2097152,
"transformer.layers.20.1.g": 1024,
"transformer.layers.20.2.to_q.weight": 1048576,
"transformer.layers.20.2.to_q.bias": 1024,
"transformer.layers.20.2.to_k.weight": 1048576,
"transformer.layers.20.2.to_k.bias": 1024,
"transformer.layers.20.2.to_v.weight": 1048576,
"transformer.layers.20.2.to_v.bias": 1024,
"transformer.layers.20.2.to_out.0.weight": 1048576,
"transformer.layers.20.2.to_out.0.bias": 1024,
"transformer.layers.20.3.g": 1024,
"transformer.layers.20.4.ff.0.0.weight": 4194304,
"transformer.layers.20.4.ff.0.0.bias": 4096,
"transformer.layers.20.4.ff.2.weight": 4194304,
"transformer.layers.20.4.ff.2.bias": 1024,
"transformer.layers.21.0.weight": 2097152,
"transformer.layers.21.1.g": 1024,
"transformer.layers.21.2.to_q.weight": 1048576,
"transformer.layers.21.2.to_q.bias": 1024,
"transformer.layers.21.2.to_k.weight": 1048576,
"transformer.layers.21.2.to_k.bias": 1024,
"transformer.layers.21.2.to_v.weight": 1048576,
"transformer.layers.21.2.to_v.bias": 1024,
"transformer.layers.21.2.to_out.0.weight": 1048576,
"transformer.layers.21.2.to_out.0.bias": 1024,
"transformer.layers.21.3.g": 1024,
"transformer.layers.21.4.ff.0.0.weight": 4194304,
"transformer.layers.21.4.ff.0.0.bias": 4096,
"transformer.layers.21.4.ff.2.weight": 4194304,
"transformer.layers.21.4.ff.2.bias": 1024,
"transformer.layers.22.0.weight": 2097152,
"transformer.layers.22.1.g": 1024,
"transformer.layers.22.2.to_q.weight": 1048576,
"transformer.layers.22.2.to_q.bias": 1024,
"transformer.layers.22.2.to_k.weight": 1048576,
"transformer.layers.22.2.to_k.bias": 1024,
"transformer.layers.22.2.to_v.weight": 1048576,
"transformer.layers.22.2.to_v.bias": 1024,
"transformer.layers.22.2.to_out.0.weight": 1048576,
"transformer.layers.22.2.to_out.0.bias": 1024,
"transformer.layers.22.3.g": 1024,
"transformer.layers.22.4.ff.0.0.weight": 4194304,
"transformer.layers.22.4.ff.0.0.bias": 4096,
"transformer.layers.22.4.ff.2.weight": 4194304,
"transformer.layers.22.4.ff.2.bias": 1024,
"transformer.layers.23.0.weight": 2097152,
"transformer.layers.23.1.g": 1024,
"transformer.layers.23.2.to_q.weight": 1048576,
"transformer.layers.23.2.to_q.bias": 1024,
"transformer.layers.23.2.to_k.weight": 1048576,
"transformer.layers.23.2.to_k.bias": 1024,
"transformer.layers.23.2.to_v.weight": 1048576,
"transformer.layers.23.2.to_v.bias": 1024,
"transformer.layers.23.2.to_out.0.weight": 1048576,
"transformer.layers.23.2.to_out.0.bias": 1024,
"transformer.layers.23.3.g": 1024,
"transformer.layers.23.4.ff.0.0.weight": 4194304,
"transformer.layers.23.4.ff.0.0.bias": 4096,
"transformer.layers.23.4.ff.2.weight": 4194304,
"transformer.layers.23.4.ff.2.bias": 1024,
"transformer.layers.24.0.weight": 2097152,
"transformer.layers.24.1.g": 1024,
"transformer.layers.24.2.to_q.weight": 1048576,
"transformer.layers.24.2.to_q.bias": 1024,
"transformer.layers.24.2.to_k.weight": 1048576,
"transformer.layers.24.2.to_k.bias": 1024,
"transformer.layers.24.2.to_v.weight": 1048576,
"transformer.layers.24.2.to_v.bias": 1024,
"transformer.layers.24.2.to_out.0.weight": 1048576,
"transformer.layers.24.2.to_out.0.bias": 1024,
"transformer.layers.24.3.g": 1024,
"transformer.layers.24.4.ff.0.0.weight": 4194304,
"transformer.layers.24.4.ff.0.0.bias": 4096,
"transformer.layers.24.4.ff.2.weight": 4194304,
"transformer.layers.24.4.ff.2.bias": 1024,
"transformer.layers.25.0.weight": 2097152,
"transformer.layers.25.1.g": 1024,
"transformer.layers.25.2.to_q.weight": 1048576,
"transformer.layers.25.2.to_q.bias": 1024,
"transformer.layers.25.2.to_k.weight": 1048576,
"transformer.layers.25.2.to_k.bias": 1024,
"transformer.layers.25.2.to_v.weight": 1048576,
"transformer.layers.25.2.to_v.bias": 1024,
"transformer.layers.25.2.to_out.0.weight": 1048576,
"transformer.layers.25.2.to_out.0.bias": 1024,
"transformer.layers.25.3.g": 1024,
"transformer.layers.25.4.ff.0.0.weight": 4194304,
"transformer.layers.25.4.ff.0.0.bias": 4096,
"transformer.layers.25.4.ff.2.weight": 4194304,
"transformer.layers.25.4.ff.2.bias": 1024,
"transformer.norm_out.g": 1024,
"transformer.proj_out.weight": 102400,
"transformer.proj_out.bias": 100
},
"important_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
],
"bottleneck_layers": [],
"recommendations": {
"focus_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
]
},
"total_parameters": 391,
"total_elements": 360755948,
"param_ranges": {
"transformer.time_embed.time_mlp.0.weight": {
"min": -0.4302472174167633,
"max": 0.2981015741825104,
"mean": -0.0025541300419718027,
"std": 0.04255979508161545,
"sparsity": 0.0,
"shape": [
1024,
256
]
},
"transformer.time_embed.time_mlp.0.bias": {
"min": -0.06310182064771652,
"max": 0.10759169608354568,
"mean": 0.0006188107072375715,
"std": 0.03408230096101761,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.time_embed.time_mlp.2.weight": {
"min": -0.4127681851387024,
"max": 0.8368753790855408,
"mean": -0.00020183739252388477,
"std": 0.024111691862344742,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.time_embed.time_mlp.2.bias": {
"min": -0.11528493463993073,
"max": 0.32169410586357117,
"mean": -0.0009411157225258648,
"std": 0.019568322226405144,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.text_embed.text_embed.weight": {
"min": -2.7921671867370605,
"max": 2.8708858489990234,
"mean": -0.00036475385422818363,
"std": 0.6154695153236389,
"sparsity": 0.0,
"shape": [
2546,
100
]
},
"transformer.input_embed.proj.weight": {
"min": -0.279247909784317,
"max": 0.3815617561340332,
"mean": 0.0004244846059009433,
"std": 0.04274849221110344,
"sparsity": 0.0,
"shape": [
1024,
300
]
},
"transformer.input_embed.proj.bias": {
"min": -0.22255778312683105,
"max": 0.2097877562046051,
"mean": -0.00448887562379241,
"std": 0.040919456630945206,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": {
"min": -0.42842426896095276,
"max": 0.47603461146354675,
"mean": 3.9225278669619e-06,
"std": 0.024510197341442108,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": {
"min": -0.3252594470977783,
"max": 0.1568366438150406,
"mean": -0.04670371487736702,
"std": 0.05158696323633194,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": {
"min": -0.41043803095817566,
"max": 0.3547053635120392,
"mean": -0.00013071295688860118,
"std": 0.023602206259965897,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": {
"min": -0.22980256378650665,
"max": 0.26275309920310974,
"mean": -0.02913004904985428,
"std": 0.04934975132346153,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.1.g": {
"min": 0.25458577275276184,
"max": 0.8201687335968018,
"mean": 0.5254767537117004,
"std": 0.08081887662410736,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_q.weight": {
"min": -0.2970397174358368,
"max": 0.2657235562801361,
"mean": -0.00042574311373755336,
"std": 0.03210281580686569,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_q.bias": {
"min": -0.09289710968732834,
"max": 0.1248435452580452,
"mean": 0.0006472540553659201,
"std": 0.025739869102835655,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_k.weight": {
"min": -0.2908317744731903,
"max": 0.2814251184463501,
"mean": -7.539847865700722e-05,
"std": 0.030931154265999794,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_k.bias": {
"min": -5.900259494781494,
"max": 5.815035820007324,
"mean": -0.009333062916994095,
"std": 1.2956619262695312,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_v.weight": {
"min": -0.42508748173713684,
"max": 0.3436461091041565,
"mean": 9.804315777728334e-05,
"std": 0.029953401535749435,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_v.bias": {
"min": -0.028917992487549782,
"max": 0.027773840352892876,
"mean": -0.00031790570938028395,
"std": 0.012571282684803009,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_out.0.weight": {
"min": -0.4539951980113983,
"max": 0.44834843277931213,
"mean": 2.359610516577959e-05,
"std": 0.02385314740240574,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_out.0.bias": {
"min": -0.08872788399457932,
"max": 0.0911579355597496,
"mean": 0.0022788788191974163,
"std": 0.01951882243156433,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.3.g": {
"min": 0.26684099435806274,
"max": 1.056283712387085,
"mean": 0.5311816930770874,
"std": 0.10443845391273499,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.4.ff.0.0.weight": {
"min": -0.5745589733123779,
"max": 0.608278751373291,
"mean": -0.0004312347446102649,
"std": 0.03859887644648552,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.0.4.ff.0.0.bias": {
"min": -0.18254584074020386,
"max": 0.04550725594162941,
"mean": -0.02946603111922741,
"std": 0.042608592659235,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.0.4.ff.2.weight": {
"min": -1.1672769784927368,
"max": 1.6339865922927856,
"mean": 0.0003258037322666496,
"std": 0.027695847675204277,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.0.4.ff.2.bias": {
"min": -0.16238044202327728,
"max": 0.205756276845932,
"mean": -0.021133966743946075,
"std": 0.02794249914586544,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.1.g": {
"min": 0.2238895148038864,
"max": 0.8438186645507812,
"mean": 0.48762065172195435,
"std": 0.07522151619195938,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_q.weight": {
"min": -0.2554994523525238,
"max": 0.30581825971603394,
"mean": -6.700396625092253e-06,
"std": 0.03347325325012207,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_q.bias": {
"min": -0.09536930173635483,
"max": 0.11054016649723053,
"mean": 6.769842002540827e-05,
"std": 0.026959657669067383,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_k.weight": {
"min": -0.2971096336841583,
"max": 0.2961491346359253,
"mean": 5.292622518027201e-05,
"std": 0.03254416957497597,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_k.bias": {
"min": -5.165089130401611,
"max": 5.085312843322754,
"mean": -0.01459675282239914,
"std": 1.1575658321380615,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_v.weight": {
"min": -0.34498170018196106,
"max": 0.3433385491371155,
"mean": 7.90221311035566e-05,
"std": 0.03006155788898468,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_v.bias": {
"min": -0.03615141659975052,
"max": 0.03325657546520233,
"mean": -0.00014247104991227388,
"std": 0.01303154043853283,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_out.0.weight": {
"min": -0.3154723644256592,
"max": 0.37497249245643616,
"mean": -2.0466719433898106e-05,
"std": 0.02405875362455845,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_out.0.bias": {
"min": -0.10546914488077164,
"max": 0.12202588468790054,
"mean": -0.0019681837875396013,
"std": 0.028853828087449074,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.3.g": {
"min": 0.3114376366138458,
"max": 1.12091863155365,
"mean": 0.6662803292274475,
"std": 0.09775093197822571,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.4.ff.0.0.weight": {
"min": -0.8727247714996338,
"max": 0.6275021433830261,
"mean": 0.001675525214523077,
"std": 0.047438088804483414,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.1.4.ff.0.0.bias": {
"min": -0.2714252769947052,
"max": 0.03427727520465851,
"mean": -0.04661863297224045,
"std": 0.04059664160013199,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.1.4.ff.2.weight": {
"min": -0.9225524067878723,
"max": 0.9647303223609924,
"mean": 0.0010189020540565252,
"std": 0.04070537909865379,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.1.4.ff.2.bias": {
"min": -0.1445721685886383,
"max": 0.07502365112304688,
"mean": -0.009085974656045437,
"std": 0.02569437585771084,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.1.g": {
"min": 0.24001570045948029,
"max": 0.7130113244056702,
"mean": 0.44724389910697937,
"std": 0.059336330741643906,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_q.weight": {
"min": -0.27252721786499023,
"max": 0.2977474629878998,
"mean": 9.076926289708354e-06,
"std": 0.03546866402029991,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_q.bias": {
"min": -0.11933133751153946,
"max": 0.11861857026815414,
"mean": 0.000759843154810369,
"std": 0.027626313269138336,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_k.weight": {
"min": -0.2810227572917938,
"max": 0.2797848582267761,
"mean": -7.693594670854509e-05,
"std": 0.03509817644953728,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_k.bias": {
"min": -2.5099942684173584,
"max": 2.5219902992248535,
"mean": 0.026751244440674782,
"std": 0.5868741273880005,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_v.weight": {
"min": -0.2210882604122162,
"max": 0.27153223752975464,
"mean": 2.4560677047702484e-06,
"std": 0.030732404440641403,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_v.bias": {
"min": -0.0335114523768425,
"max": 0.031222868710756302,
"mean": 0.00011844941036542878,
"std": 0.01240864023566246,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_out.0.weight": {
"min": -0.23524264991283417,
"max": 0.23183144629001617,
"mean": 5.6907440011855215e-05,
"std": 0.025696856901049614,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_out.0.bias": {
"min": -0.13587476313114166,
"max": 0.12763848900794983,
"mean": -0.005494903773069382,
"std": 0.039958395063877106,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.3.g": {
"min": 0.35451608896255493,
"max": 1.1720539331436157,
"mean": 0.7106262445449829,
"std": 0.10376716405153275,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.4.ff.0.0.weight": {
"min": -0.6174105405807495,
"max": 0.5545085072517395,
"mean": 0.0011598969576880336,
"std": 0.04611882567405701,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.2.4.ff.0.0.bias": {
"min": -0.1878771334886551,
"max": 0.024924062192440033,
"mean": -0.0348367840051651,
"std": 0.028611591085791588,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.2.4.ff.2.weight": {
"min": -1.1316187381744385,
"max": 0.971271812915802,
"mean": 0.0003585200756788254,
"std": 0.0423467643558979,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.2.4.ff.2.bias": {
"min": -0.5980486869812012,
"max": 0.06288419663906097,
"mean": -0.0048779072239995,
"std": 0.028619417920708656,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.1.g": {
"min": 0.3752831816673279,
"max": 0.9404632449150085,
"mean": 0.5925332307815552,
"std": 0.0669492781162262,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_q.weight": {
"min": -0.3914392590522766,
"max": 0.36907821893692017,
"mean": 7.118703797459602e-05,
"std": 0.03718792647123337,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_q.bias": {
"min": -0.11911813914775848,
"max": 0.1366533637046814,
"mean": 0.0009285138221457601,
"std": 0.029234997928142548,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_k.weight": {
"min": -0.6193273067474365,
"max": 0.5089406967163086,
"mean": 1.5145867109822575e-05,
"std": 0.036441244184970856,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_k.bias": {
"min": -8.18839168548584,
"max": 8.790501594543457,
"mean": -0.1092919334769249,
"std": 1.6991198062896729,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_v.weight": {
"min": -0.27663567662239075,
"max": 0.23973354697227478,
"mean": 5.2983978093834594e-05,
"std": 0.032615091651678085,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_v.bias": {
"min": -0.05204087495803833,
"max": 0.03958116099238396,
"mean": 9.567412780597806e-05,
"std": 0.012961393222212791,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_out.0.weight": {
"min": -0.23071666061878204,
"max": 0.234710693359375,
"mean": -2.1666935936082155e-05,
"std": 0.029391352087259293,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_out.0.bias": {
"min": -0.20436595380306244,
"max": 0.10555993020534515,
"mean": -0.004022484645247459,
"std": 0.032626353204250336,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.3.g": {
"min": 0.3398347795009613,
"max": 1.0127081871032715,
"mean": 0.7008411884307861,
"std": 0.09675740450620651,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.4.ff.0.0.weight": {
"min": -0.5648741126060486,
"max": 0.8332529664039612,
"mean": 0.00041526954737491906,
"std": 0.04230087623000145,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.3.4.ff.0.0.bias": {
"min": -0.2118305265903473,
"max": 0.030412573367357254,
"mean": -0.032187312841415405,
"std": 0.026507310569286346,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.3.4.ff.2.weight": {
"min": -0.7544606924057007,
"max": 0.718633234500885,
"mean": -1.3493583537638187e-05,
"std": 0.03684115782380104,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.3.4.ff.2.bias": {
"min": -0.26357486844062805,
"max": 0.10591558367013931,
"mean": -0.0030233184807002544,
"std": 0.028867946937680244,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.1.g": {
"min": 0.2842615246772766,
"max": 0.6951268911361694,
"mean": 0.4995192289352417,
"std": 0.04653889685869217,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_q.weight": {
"min": -0.2790677845478058,
"max": 0.2343253642320633,
"mean": -0.00011120391718577594,
"std": 0.03876161575317383,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_q.bias": {
"min": -0.15418817102909088,
"max": 0.12667444348335266,
"mean": -0.0022305608727037907,
"std": 0.033373840153217316,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_k.weight": {
"min": -0.4139367640018463,
"max": 0.660070538520813,
"mean": -1.9737122784135863e-05,
"std": 0.03909851238131523,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_k.bias": {
"min": -4.238705635070801,
"max": 4.723268985748291,
"mean": -0.020462416112422943,
"std": 1.0078494548797607,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_v.weight": {
"min": -0.24497364461421967,
"max": 0.20763254165649414,
"mean": 4.4202079152455553e-05,
"std": 0.033965613692998886,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_v.bias": {
"min": -0.03459359332919121,
"max": 0.04478804022073746,
"mean": -2.136104740202427e-05,
"std": 0.012631777673959732,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_out.0.weight": {
"min": -0.20072373747825623,
"max": 0.20615817606449127,
"mean": -2.975538700411562e-05,
"std": 0.031023193150758743,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_out.0.bias": {
"min": -0.19997990131378174,
"max": 0.11331257969141006,
"mean": -0.0029115378856658936,
"std": 0.03451942652463913,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.3.g": {
"min": 0.36702099442481995,
"max": 1.0571231842041016,
"mean": 0.6706027388572693,
"std": 0.06639590114355087,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.4.ff.0.0.weight": {
"min": -0.3983962833881378,
"max": 0.5022679567337036,
"mean": -3.846201434498653e-05,
"std": 0.04113590717315674,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.4.4.ff.0.0.bias": {
"min": -0.12814512848854065,
"max": 0.02683641016483307,
"mean": -0.03054228238761425,
"std": 0.02187994495034218,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.4.4.ff.2.weight": {
"min": -0.44913211464881897,
"max": 0.433132529258728,
"mean": 7.945985271362588e-05,
"std": 0.0348953977227211,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.4.4.ff.2.bias": {
"min": -0.2676845192909241,
"max": 0.0728912353515625,
"mean": -0.0011024216655641794,
"std": 0.023127950727939606,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.1.g": {
"min": 0.2873011827468872,
"max": 0.6852278709411621,
"mean": 0.5245736837387085,
"std": 0.047536205500364304,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_q.weight": {
"min": -0.22235900163650513,
"max": 0.2234368920326233,
"mean": 1.5712306776549667e-05,
"std": 0.0389518178999424,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_q.bias": {
"min": -0.13648226857185364,
"max": 0.10937032103538513,
"mean": 0.00023500403040088713,
"std": 0.02922363579273224,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_k.weight": {
"min": -0.3750153183937073,
"max": 0.4373463988304138,
"mean": -9.542611223878339e-06,
"std": 0.03928782045841217,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_k.bias": {
"min": -3.8463687896728516,
"max": 5.000114917755127,
"mean": 0.00974472425878048,
"std": 0.8453519344329834,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_v.weight": {
"min": -0.22320200502872467,
"max": 0.2200344353914261,
"mean": -1.8790160538628697e-07,
"std": 0.03441300988197327,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_v.bias": {
"min": -0.04361514747142792,
"max": 0.03597420081496239,
"mean": -0.0002564755268394947,
"std": 0.01208114717155695,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_out.0.weight": {
"min": -0.21329528093338013,
"max": 0.1889103502035141,
"mean": -1.6649610188324004e-05,
"std": 0.031539641320705414,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_out.0.bias": {
"min": -0.18086224794387817,
"max": 0.12070237100124359,
"mean": -0.002405309583991766,
"std": 0.041269298642873764,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.3.g": {
"min": 0.4225497245788574,
"max": 0.9420632123947144,
"mean": 0.6627737283706665,
"std": 0.056812334805727005,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.4.ff.0.0.weight": {
"min": -0.3715151250362396,
"max": 0.4758515954017639,
"mean": -8.248311496572569e-05,
"std": 0.040895167738199234,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.5.4.ff.0.0.bias": {
"min": -0.20838980376720428,
"max": 0.027207521721720695,
"mean": -0.030246354639530182,
"std": 0.02134900726377964,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.5.4.ff.2.weight": {
"min": -0.3401075303554535,
"max": 0.7336291074752808,
"mean": 8.389431604882702e-05,
"std": 0.034770816564559937,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.5.4.ff.2.bias": {
"min": -0.24028635025024414,
"max": 0.05047708749771118,
"mean": -0.001194795360788703,
"std": 0.020465141162276268,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.1.g": {
"min": 0.30595850944519043,
"max": 0.6537705063819885,
"mean": 0.5251566767692566,
"std": 0.04612725228071213,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_q.weight": {
"min": -0.30432915687561035,
"max": 0.21739104390144348,
"mean": 6.996125739533454e-05,
"std": 0.03949799761176109,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_q.bias": {
"min": -0.14943227171897888,
"max": 0.13134317100048065,
"mean": 0.00034546080860309303,
"std": 0.030460603535175323,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_k.weight": {
"min": -0.25738975405693054,
"max": 0.20207944512367249,
"mean": 3.1017469154903665e-05,
"std": 0.03948727250099182,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_k.bias": {
"min": -2.336665153503418,
"max": 2.376288890838623,
"mean": -0.026247629895806313,
"std": 0.44984105229377747,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_v.weight": {
"min": -0.18894457817077637,
"max": 0.21059554815292358,
"mean": 3.7193480238784105e-05,
"std": 0.034797847270965576,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_v.bias": {
"min": -0.03164611756801605,
"max": 0.03540992736816406,
"mean": -0.00020107367890886962,
"std": 0.012292974628508091,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_out.0.weight": {
"min": -0.18840090930461884,
"max": 0.17046599090099335,
"mean": -6.797777314204723e-05,
"std": 0.032174453139305115,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_out.0.bias": {
"min": -0.13930758833885193,
"max": 0.13733482360839844,
"mean": -0.002516954904422164,
"std": 0.05130286514759064,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.3.g": {
"min": 0.46718037128448486,
"max": 0.9563874006271362,
"mean": 0.6689748764038086,
"std": 0.05278700590133667,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.4.ff.0.0.weight": {
"min": -0.3242974579334259,
"max": 0.3098086714744568,
"mean": -1.3617936929222196e-06,
"std": 0.04095118120312691,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.6.4.ff.0.0.bias": {
"min": -0.12469282001256943,
"max": 0.02526070550084114,
"mean": -0.030708763748407364,
"std": 0.019816862419247627,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.6.4.ff.2.weight": {
"min": -0.4401431083679199,
"max": 0.44523754715919495,
"mean": 9.650168067310005e-05,
"std": 0.03512365743517876,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.6.4.ff.2.bias": {
"min": -0.22469638288021088,
"max": 0.05176383629441261,
"mean": -0.0011855906341224909,
"std": 0.018477564677596092,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.1.g": {
"min": 0.3391278088092804,
"max": 0.7394291162490845,
"mean": 0.5587280988693237,
"std": 0.04140337556600571,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_q.weight": {
"min": -0.27262935042381287,
"max": 0.2784675061702728,
"mean": 1.984157097467687e-05,
"std": 0.041061654686927795,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_q.bias": {
"min": -0.1370200663805008,
"max": 0.13985797762870789,
"mean": 0.0004876606981270015,
"std": 0.026632333174347878,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_k.weight": {
"min": -0.49073120951652527,
"max": 0.35599616169929504,
"mean": 8.872401667758822e-05,
"std": 0.040699537843465805,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_k.bias": {
"min": -2.2974724769592285,
"max": 1.7454196214675903,
"mean": -0.021081820130348206,
"std": 0.5002042055130005,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_v.weight": {
"min": -0.21770520508289337,
"max": 0.19793029129505157,
"mean": -4.0488688682671636e-05,
"std": 0.03423655033111572,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_v.bias": {
"min": -0.0412483848631382,
"max": 0.038579147309064865,
"mean": -0.00014048503362573683,
"std": 0.012878325767815113,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_out.0.weight": {
"min": -0.17742925882339478,
"max": 0.1836576759815216,
"mean": 4.762586468132213e-05,
"std": 0.031559526920318604,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_out.0.bias": {
"min": -0.17993344366550446,
"max": 0.18376585841178894,
"mean": -0.0022200806997716427,
"std": 0.05484066903591156,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.3.g": {
"min": 0.4742898643016815,
"max": 1.0256999731063843,
"mean": 0.6453396677970886,
"std": 0.05035531893372536,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.4.ff.0.0.weight": {
"min": -0.27185067534446716,
"max": 0.3093453645706177,
"mean": 0.00011244456982240081,
"std": 0.040687281638383865,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.7.4.ff.0.0.bias": {
"min": -0.10582341998815536,
"max": 0.02690320834517479,
"mean": -0.02951919659972191,
"std": 0.017931465059518814,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.7.4.ff.2.weight": {
"min": -0.3390185832977295,
"max": 0.32922977209091187,
"mean": 5.620906449621543e-05,
"std": 0.034417424350976944,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.7.4.ff.2.bias": {
"min": -0.18173733353614807,
"max": 0.04227666184306145,
"mean": -0.0010707223555073142,
"std": 0.017213771119713783,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.1.g": {
"min": 0.3254404067993164,
"max": 0.6867184638977051,
"mean": 0.5112515091896057,
"std": 0.036953605711460114,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_q.weight": {
"min": -0.23387184739112854,
"max": 0.22577211260795593,
"mean": -3.611366992117837e-05,
"std": 0.039180755615234375,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_q.bias": {
"min": -0.11533147841691971,
"max": 0.13174240291118622,
"mean": 0.00015339103993028402,
"std": 0.029181061312556267,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_k.weight": {
"min": -0.3528231382369995,
"max": 0.28539976477622986,
"mean": 7.355230991379358e-06,
"std": 0.03924909234046936,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_k.bias": {
"min": -4.133138179779053,
"max": 3.544285774230957,
"mean": -0.011592379771173,
"std": 0.682723343372345,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_v.weight": {
"min": -0.211366206407547,
"max": 0.20918519794940948,
"mean": 3.47092718584463e-05,
"std": 0.03448852524161339,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_v.bias": {
"min": -0.03565165773034096,
"max": 0.04795990511775017,
"mean": 0.0007935892790555954,
"std": 0.012854626402258873,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_out.0.weight": {
"min": -0.21064111590385437,
"max": 0.1932363212108612,
"mean": -1.2698478712991346e-06,
"std": 0.03169921413064003,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_out.0.bias": {
"min": -0.18659119307994843,
"max": 0.17711447179317474,
"mean": -0.0028428896330296993,
"std": 0.05864271521568298,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.3.g": {
"min": 0.4746437668800354,
"max": 1.0418283939361572,
"mean": 0.6514592170715332,
"std": 0.049664221704006195,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.4.ff.0.0.weight": {
"min": -0.24862074851989746,
"max": 0.3290244936943054,
"mean": 0.0001805826323106885,
"std": 0.04057569056749344,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.8.4.ff.0.0.bias": {
"min": -0.12504367530345917,
"max": 0.024559227749705315,
"mean": -0.030504360795021057,
"std": 0.017604367807507515,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.8.4.ff.2.weight": {
"min": -0.42111000418663025,
"max": 0.4816901385784149,
"mean": -1.4580382412532344e-07,
"std": 0.03540220111608505,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.8.4.ff.2.bias": {
"min": -0.15185561776161194,
"max": 0.04354217275977135,
"mean": 4.59605835203547e-05,
"std": 0.014884229749441147,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.1.g": {
"min": 0.3155389130115509,
"max": 0.6820871829986572,
"mean": 0.5529488921165466,
"std": 0.04071735590696335,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_q.weight": {
"min": -0.20643381774425507,
"max": 0.21991202235221863,
"mean": 3.090859536314383e-05,
"std": 0.03830238804221153,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_q.bias": {
"min": -0.13782010972499847,
"max": 0.11272551119327545,
"mean": 1.9601531676016748e-05,
"std": 0.025822695344686508,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_k.weight": {
"min": -0.40278956294059753,
"max": 0.37109923362731934,
"mean": 2.618670441734139e-05,
"std": 0.03818415477871895,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_k.bias": {
"min": -3.7713959217071533,
"max": 2.8690977096557617,
"mean": 0.0011573811061680317,
"std": 0.5169072151184082,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_v.weight": {
"min": -0.20292331278324127,
"max": 0.1974206268787384,
"mean": 2.9524358978960663e-05,
"std": 0.03429995849728584,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_v.bias": {
"min": -0.05099167302250862,
"max": 0.040043603628873825,
"mean": -0.00041941594099625945,
"std": 0.01342028472572565,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_out.0.weight": {
"min": -0.19649569690227509,
"max": 0.20179419219493866,
"mean": -1.231730857398361e-05,
"std": 0.031807754188776016,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_out.0.bias": {
"min": -0.19327867031097412,
"max": 0.195101797580719,
"mean": -0.002969510853290558,
"std": 0.06256763637065887,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.3.g": {
"min": 0.3495103716850281,
"max": 1.0841096639633179,
"mean": 0.6672286987304688,
"std": 0.055231790989637375,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.4.ff.0.0.weight": {
"min": -0.22513826191425323,
"max": 0.25143498182296753,
"mean": 0.00035896283225156367,
"std": 0.040764935314655304,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.9.4.ff.0.0.bias": {
"min": -0.0910005122423172,
"max": 0.043744608759880066,
"mean": -0.030088767409324646,
"std": 0.017610033974051476,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.9.4.ff.2.weight": {
"min": -0.3535524308681488,
"max": 0.30403411388397217,
"mean": -4.383287887321785e-05,
"std": 0.03712723031640053,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.9.4.ff.2.bias": {
"min": -0.16203957796096802,
"max": 0.063482366502285,
"mean": -8.168067142833024e-05,
"std": 0.019403086975216866,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.1.g": {
"min": 0.3487982153892517,
"max": 0.7220908999443054,
"mean": 0.542417049407959,
"std": 0.039066411554813385,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_q.weight": {
"min": -0.2193998396396637,
"max": 0.22306619584560394,
"mean": -1.1200094377272762e-05,
"std": 0.039234746247529984,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_q.bias": {
"min": -0.118410125374794,
"max": 0.17068907618522644,
"mean": 0.00027954723918810487,
"std": 0.02511775679886341,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_k.weight": {
"min": -0.2468089461326599,
"max": 0.3010835647583008,
"mean": -3.6559536965796724e-05,
"std": 0.03893429413437843,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_k.bias": {
"min": -3.5055222511291504,
"max": 3.714968204498291,
"mean": 0.015851959586143494,
"std": 0.7825093269348145,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_v.weight": {
"min": -0.21874836087226868,
"max": 0.2377166897058487,
"mean": -1.354666437691776e-05,
"std": 0.036306966096162796,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_v.bias": {
"min": -0.04711933806538582,
"max": 0.051407281309366226,
"mean": 0.0004819422902073711,
"std": 0.013517641462385654,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_out.0.weight": {
"min": -0.21396194398403168,
"max": 0.2176503837108612,
"mean": 5.661203613271937e-05,
"std": 0.033618949353694916,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_out.0.bias": {
"min": -0.21143636107444763,
"max": 0.23150545358657837,
"mean": -0.0051071615889668465,
"std": 0.061890047043561935,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.3.g": {
"min": 0.36224639415740967,
"max": 1.1013858318328857,
"mean": 0.6993460655212402,
"std": 0.053608398884534836,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.4.ff.0.0.weight": {
"min": -0.23459650576114655,
"max": 0.2449653446674347,
"mean": 0.00046337299863807857,
"std": 0.04127378761768341,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.10.4.ff.0.0.bias": {
"min": -0.09813369810581207,
"max": 0.06841138750314713,
"mean": -0.03143805265426636,
"std": 0.018124299123883247,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.10.4.ff.2.weight": {
"min": -0.3017565906047821,
"max": 0.35157960653305054,
"mean": -8.145418541971594e-05,
"std": 0.04027964174747467,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.10.4.ff.2.bias": {
"min": -0.15233245491981506,
"max": 0.1496550738811493,
"mean": 0.0002547369513195008,
"std": 0.023038377985358238,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.1.g": {
"min": 0.999387264251709,
"max": 1.0017390251159668,
"mean": 1.0002288818359375,
"std": 0.0006608659168705344,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_q.weight": {
"min": -0.03126532956957817,
"max": 0.03126157820224762,
"mean": -1.929386235133279e-05,
"std": 0.01804366707801819,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_q.bias": {
"min": -0.031232358887791634,
"max": 0.030991962179541588,
"mean": -0.0010843182681128383,
"std": 0.01795327477157116,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_k.weight": {
"min": -0.031262245029211044,
"max": 0.031266022473573685,
"mean": 3.54884014086565e-06,
"std": 0.01804407499730587,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_k.bias": {
"min": -0.03115827776491642,
"max": 0.031178638339042664,
"mean": 0.00033397332299500704,
"std": 0.01806548982858658,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_out.0.bias": {
"min": -0.0002825965639203787,
"max": 0.0002991823712363839,
"mean": 9.51684285155352e-07,
"std": 8.538085967302322e-05,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.3.g": {
"min": 0.9994979500770569,
"max": 1.0022096633911133,
"mean": 1.0004006624221802,
"std": 0.0006605891394428909,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.4.ff.0.0.weight": {
"min": -0.03165094926953316,
"max": 0.03164109215140343,
"mean": -8.348271876457147e-06,
"std": 0.018046928569674492,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.11.4.ff.0.0.bias": {
"min": -0.03134633228182793,
"max": 0.031511712819337845,
"mean": 0.00030681173666380346,
"std": 0.018000956624746323,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.11.4.ff.2.weight": {
"min": -0.00043215902405790985,
"max": 0.00046604761155322194,
"mean": 6.842553190722356e-09,
"std": 8.495857764501125e-05,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.11.4.ff.2.bias": {
"min": -0.00026889159926213324,
"max": 0.0002754697925411165,
"mean": -3.8592878581766854e-07,
"std": 8.52422381285578e-05,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.1.g": {
"min": 0.3829193115234375,
"max": 0.7194843292236328,
"mean": 0.5807508826255798,
"std": 0.03887004032731056,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_q.weight": {
"min": -0.23809631168842316,
"max": 0.1965617835521698,
"mean": 2.6561519916867837e-05,
"std": 0.03746955841779709,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_q.bias": {
"min": -0.11867669969797134,
"max": 0.1661195158958435,
"mean": 0.0009914024267345667,
"std": 0.02754930779337883,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_k.weight": {
"min": -0.246256485581398,
"max": 0.5006742477416992,
"mean": -5.049802712164819e-05,
"std": 0.03762722760438919,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_k.bias": {
"min": -3.9423649311065674,
"max": 3.7695066928863525,
"mean": -0.003572166431695223,
"std": 0.6814473271369934,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_v.weight": {
"min": -0.22738605737686157,
"max": 0.2515488564968109,
"mean": -1.1636337148956954e-05,
"std": 0.03743850067257881,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_v.bias": {
"min": -0.07163971662521362,
"max": 0.08085085451602936,
"mean": -0.0005172090604901314,
"std": 0.015671856701374054,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_out.0.weight": {
"min": -0.22821125388145447,
"max": 0.25809258222579956,
"mean": -2.8563266823766753e-05,
"std": 0.03542532026767731,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_out.0.bias": {
"min": -0.20053939521312714,
"max": 0.2151157110929489,
"mean": -0.005536144133657217,
"std": 0.06835491210222244,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.3.g": {
"min": 0.4050634801387787,
"max": 1.1895967721939087,
"mean": 0.7380250096321106,
"std": 0.055244140326976776,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.4.ff.0.0.weight": {
"min": -0.22111129760742188,
"max": 0.24610112607479095,
"mean": 0.0005211608950048685,
"std": 0.04134161397814751,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.12.4.ff.0.0.bias": {
"min": -0.1035081148147583,
"max": 0.02415246143937111,
"mean": -0.03267139568924904,
"std": 0.0188875924795866,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.12.4.ff.2.weight": {
"min": -0.4493615925312042,
"max": 0.4224270284175873,
"mean": -0.00043286356958560646,
"std": 0.046902477741241455,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.12.4.ff.2.bias": {
"min": -0.25133273005485535,
"max": 0.47000864148139954,
"mean": 0.003200301667675376,
"std": 0.04454173892736435,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.0.weight": {
"min": -0.31721800565719604,
"max": 0.3333887755870819,
"mean": -2.5312700017821044e-05,
"std": 0.021290434524416924,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.13.1.g": {
"min": 0.3246031403541565,
"max": 0.6853436231613159,
"mean": 0.5710366368293762,
"std": 0.04471459612250328,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_q.weight": {
"min": -0.16453500092029572,
"max": 0.1740685999393463,
"mean": -4.849593824474141e-05,
"std": 0.03318438306450844,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_q.bias": {
"min": -0.1867130845785141,
"max": 0.14271552860736847,
"mean": 4.246922617312521e-05,
"std": 0.02968418225646019,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_k.weight": {
"min": -0.3805757164955139,
"max": 0.24612776935100555,
"mean": -9.95914979284862e-06,
"std": 0.03276544809341431,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_k.bias": {
"min": -3.655998945236206,
"max": 3.29028582572937,
"mean": -0.014252795837819576,
"std": 0.9852345585823059,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_v.weight": {
"min": -0.23509258031845093,
"max": 0.24746716022491455,
"mean": -1.7896145436679944e-05,
"std": 0.041701558977365494,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_v.bias": {
"min": -0.0727391242980957,
"max": 0.15445110201835632,
"mean": 0.0006684996769763529,
"std": 0.0251635629683733,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_out.0.weight": {
"min": -0.2665387690067291,
"max": 0.24852725863456726,
"mean": -1.545724444440566e-05,
"std": 0.040141962468624115,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_out.0.bias": {
"min": -0.1895304173231125,
"max": 0.1947212517261505,
"mean": -0.0012303038965910673,
"std": 0.06668580323457718,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.3.g": {
"min": 0.32925331592559814,
"max": 0.9993983507156372,
"mean": 0.7192491888999939,
"std": 0.05233968794345856,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.4.ff.0.0.weight": {
"min": -0.23172250390052795,
"max": 0.24564699828624725,
"mean": 0.0001827301166485995,
"std": 0.040905360132455826,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.13.4.ff.0.0.bias": {
"min": -0.11416275054216385,
"max": 0.01871776208281517,
"mean": -0.04247911646962166,
"std": 0.018855031579732895,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.13.4.ff.2.weight": {
"min": -0.3899572193622589,
"max": 0.4073238670825958,
"mean": -2.1964835468679667e-05,
"std": 0.048539649695158005,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.13.4.ff.2.bias": {
"min": -0.6931350827217102,
"max": 0.4125315248966217,
"mean": 0.0008539482369087636,
"std": 0.060291603207588196,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.0.weight": {
"min": -0.00041267118649557233,
"max": 1.0002655982971191,
"mean": 0.00048818063805811107,
"std": 0.022091196849942207,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.14.1.g": {
"min": 0.9994122982025146,
"max": 1.0017499923706055,
"mean": 1.000227689743042,
"std": 0.0006477160495705903,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_q.weight": {
"min": -0.03126005083322525,
"max": 0.03126395121216774,
"mean": -2.102299185935408e-05,
"std": 0.018035007640719414,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_q.bias": {
"min": -0.031219350174069405,
"max": 0.031236182898283005,
"mean": -0.0006771045736968517,
"std": 0.017829518765211105,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_k.weight": {
"min": -0.03126310929656029,
"max": 0.03126853331923485,
"mean": -8.832646017253865e-06,
"std": 0.018034033477306366,
"sparsity": 9.5367431640625e-07,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_k.bias": {
"min": -0.03123609907925129,
"max": 0.0312487930059433,
"mean": -0.0007298641721718013,
"std": 0.0179444570094347,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_out.0.bias": {
"min": -0.00027907025651074946,
"max": 0.0002400849189143628,
"mean": 2.689231223484967e-06,
"std": 8.426107524428517e-05,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.3.g": {
"min": 0.9995393753051758,
"max": 1.00211501121521,
"mean": 1.0004167556762695,
"std": 0.0006692331517115235,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.4.ff.0.0.weight": {
"min": -0.031639304012060165,
"max": 0.03170545771718025,
"mean": 2.9571647246484645e-06,
"std": 0.018044477328658104,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.14.4.ff.0.0.bias": {
"min": -0.031226763501763344,
"max": 0.03141167387366295,
"mean": 0.0003237017663195729,
"std": 0.018078280612826347,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.14.4.ff.2.weight": {
"min": -0.00040698132943362,
"max": 0.0004357137659098953,
"mean": 1.1018712484656135e-06,
"std": 8.384210377698764e-05,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.14.4.ff.2.bias": {
"min": -0.0002671520342119038,
"max": 0.00023483953555114567,
"mean": 2.1393277620518347e-06,
"std": 8.360463834833354e-05,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.0.weight": {
"min": -0.23457324504852295,
"max": 0.2725456655025482,
"mean": 7.03098658050294e-06,
"std": 0.018811851739883423,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.15.1.g": {
"min": 0.32138916850090027,
"max": 0.6936908960342407,
"mean": 0.5816767811775208,
"std": 0.04592788219451904,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_q.weight": {
"min": -0.18192121386528015,
"max": 0.19770289957523346,
"mean": -1.1671071661112364e-05,
"std": 0.033187344670295715,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_q.bias": {
"min": -0.16075590252876282,
"max": 0.12948612868785858,
"mean": -0.0010705746244639158,
"std": 0.03414509445428848,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_k.weight": {
"min": -0.3322606384754181,
"max": 0.3115905225276947,
"mean": -1.047878777171718e-05,
"std": 0.032237909734249115,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_k.bias": {
"min": -7.803586006164551,
"max": 8.763325691223145,
"mean": 0.09346922487020493,
"std": 1.6197253465652466,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_v.weight": {
"min": -0.23383009433746338,
"max": 0.241935133934021,
"mean": 4.1345643694512546e-05,
"std": 0.04086088761687279,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_v.bias": {
"min": -0.0759628489613533,
"max": 0.06582564860582352,
"mean": 0.0004808574158232659,
"std": 0.01941247656941414,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_out.0.weight": {
"min": -0.24585530161857605,
"max": 0.23399215936660767,
"mean": -2.9465345505741425e-06,
"std": 0.03943563625216484,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_out.0.bias": {
"min": -0.16296693682670593,
"max": 0.16089047491550446,
"mean": 0.001630417536944151,
"std": 0.06527554988861084,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.3.g": {
"min": 0.5569126605987549,
"max": 0.9438663125038147,
"mean": 0.7129403352737427,
"std": 0.04013482853770256,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.4.ff.0.0.weight": {
"min": -0.2285519540309906,
"max": 0.2551051676273346,
"mean": -4.54609798907768e-05,
"std": 0.040580034255981445,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.15.4.ff.0.0.bias": {
"min": -0.1348046511411667,
"max": 0.022271839901804924,
"mean": -0.04135382920503616,
"std": 0.01838485151529312,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.15.4.ff.2.weight": {
"min": -0.4216119349002838,
"max": 0.3923768699169159,
"mean": -4.429011823958717e-06,
"std": 0.047790225595235825,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.15.4.ff.2.bias": {
"min": -0.6072338819503784,
"max": 0.651410698890686,
"mean": 0.0015874950913712382,
"std": 0.05684793367981911,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.0.weight": {
"min": -0.2518640160560608,
"max": 0.3208119571208954,
"mean": -6.068093171052169e-06,
"std": 0.019615380093455315,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.16.1.g": {
"min": 0.35968896746635437,
"max": 0.6824969053268433,
"mean": 0.5707405805587769,
"std": 0.04298046976327896,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_q.weight": {
"min": -0.2206181287765503,
"max": 0.177145317196846,
"mean": -3.474977711448446e-05,
"std": 0.034301795065402985,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_q.bias": {
"min": -0.16330677270889282,
"max": 0.2329079806804657,
"mean": 0.0003651169245131314,
"std": 0.032845281064510345,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_k.weight": {
"min": -0.26389849185943604,
"max": 0.23990698158740997,
"mean": -5.2482428145594895e-05,
"std": 0.033900897949934006,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_k.bias": {
"min": -4.855096817016602,
"max": 5.091324329376221,
"mean": 0.043882716447114944,
"std": 1.2292898893356323,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_v.weight": {
"min": -0.24652959406375885,
"max": 0.25042256712913513,
"mean": 7.212234049802646e-05,
"std": 0.043991539627313614,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_v.bias": {
"min": -0.06257897615432739,
"max": 0.05448286980390549,
"mean": 0.0006493264227174222,
"std": 0.017185840755701065,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_out.0.weight": {
"min": -0.2864921987056732,
"max": 0.2719077467918396,
"mean": -4.989763692719862e-05,
"std": 0.04298979416489601,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_out.0.bias": {
"min": -0.16073212027549744,
"max": 0.17026235163211823,
"mean": -0.0028884499333798885,
"std": 0.059281300753355026,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.3.g": {
"min": 0.5197049379348755,
"max": 0.9328829050064087,
"mean": 0.7135671973228455,
"std": 0.038414619863033295,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.4.ff.0.0.weight": {
"min": -0.23812194168567657,
"max": 0.24923060834407806,
"mean": 0.0004647884052246809,
"std": 0.040460310876369476,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.16.4.ff.0.0.bias": {
"min": -0.14453770220279694,
"max": 0.041513390839099884,
"mean": -0.039691261947155,
"std": 0.020545845851302147,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.16.4.ff.2.weight": {
"min": -0.5327961444854736,
"max": 0.5830419063568115,
"mean": 6.150515218905639e-06,
"std": 0.04886715114116669,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.16.4.ff.2.bias": {
"min": -0.5192174911499023,
"max": 0.493362694978714,
"mean": 0.002359903883188963,
"std": 0.05345294252038002,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.0.weight": {
"min": -0.2736135721206665,
"max": 0.31528207659721375,
"mean": 1.917778718052432e-06,
"std": 0.020052393898367882,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.17.1.g": {
"min": 0.3661349415779114,
"max": 0.7114736437797546,
"mean": 0.5932135581970215,
"std": 0.045942142605781555,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_q.weight": {
"min": -0.2110714167356491,
"max": 0.19956345856189728,
"mean": 3.0644099751953036e-05,
"std": 0.03486814722418785,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_q.bias": {
"min": -0.18721066415309906,
"max": 0.20390057563781738,
"mean": 0.0009557952871546149,
"std": 0.031514741480350494,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_k.weight": {
"min": -0.2895534336566925,
"max": 0.3397268056869507,
"mean": -4.745465412270278e-05,
"std": 0.034589096903800964,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_k.bias": {
"min": -3.877371311187744,
"max": 3.3874666690826416,
"mean": 0.014458310790359974,
"std": 0.8584496378898621,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_v.weight": {
"min": -0.2243141233921051,
"max": 0.24994920194149017,
"mean": -4.160197022429202e-06,
"std": 0.04223477095365524,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_v.bias": {
"min": -0.055164966732263565,
"max": 0.046595554798841476,
"mean": -1.914352469611913e-05,
"std": 0.01584389992058277,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_out.0.weight": {
"min": -0.29299500584602356,
"max": 0.29091835021972656,
"mean": -7.332260793191381e-06,
"std": 0.041949693113565445,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_out.0.bias": {
"min": -0.12478315085172653,
"max": 0.2593647241592407,
"mean": -0.0032380004413425922,
"std": 0.05315803363919258,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.3.g": {
"min": 0.4563468396663666,
"max": 0.8445391654968262,
"mean": 0.7056366205215454,
"std": 0.03522425889968872,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.4.ff.0.0.weight": {
"min": -0.5114319920539856,
"max": 0.34831947088241577,
"mean": 0.0003425391623750329,
"std": 0.04020523279905319,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.17.4.ff.0.0.bias": {
"min": -0.18698035180568695,
"max": 0.0395214818418026,
"mean": -0.039389487355947495,
"std": 0.021351324394345284,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.17.4.ff.2.weight": {
"min": -0.5443570613861084,
"max": 0.556300938129425,
"mean": -7.182909030234441e-05,
"std": 0.05074186250567436,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.17.4.ff.2.bias": {
"min": -0.5117379426956177,
"max": 0.6643521785736084,
"mean": 0.002444902202114463,
"std": 0.04953118786215782,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.0.weight": {
"min": -0.3325459361076355,
"max": 0.26552852988243103,
"mean": 3.543416823958978e-06,
"std": 0.019390085712075233,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.18.1.g": {
"min": 0.3221725821495056,
"max": 0.7663489580154419,
"mean": 0.6510671973228455,
"std": 0.045311350375413895,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_q.weight": {
"min": -0.24964848160743713,
"max": 0.21960312128067017,
"mean": -2.564733222243376e-06,
"std": 0.0365014374256134,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_q.bias": {
"min": -0.3271941542625427,
"max": 0.2872978150844574,
"mean": -0.0006782531854696572,
"std": 0.038559023290872574,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_k.weight": {
"min": -0.3100341856479645,
"max": 0.36996597051620483,
"mean": 6.477468559751287e-05,
"std": 0.036241985857486725,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_k.bias": {
"min": -4.717563152313232,
"max": 5.807804584503174,
"mean": 0.037958286702632904,
"std": 1.4132274389266968,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_v.weight": {
"min": -0.2217635214328766,
"max": 0.20596979558467865,
"mean": -7.51121697248891e-05,
"std": 0.04249033331871033,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_v.bias": {
"min": -0.07755438983440399,
"max": 0.051571402698755264,
"mean": -0.0009240633808076382,
"std": 0.016407648101449013,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_out.0.weight": {
"min": -0.3310355246067047,
"max": 0.32923752069473267,
"mean": -4.983477538189618e-06,
"std": 0.04279704764485359,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_out.0.bias": {
"min": -0.2849341332912445,
"max": 0.11188604682683945,
"mean": -0.0012093198020011187,
"std": 0.04701279476284981,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.3.g": {
"min": 0.4862992763519287,
"max": 0.8870015740394592,
"mean": 0.7375336289405823,
"std": 0.038240909576416016,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.4.ff.0.0.weight": {
"min": -0.3614071309566498,
"max": 0.2742360532283783,
"mean": 5.11927210027352e-05,
"std": 0.040651749819517136,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.18.4.ff.0.0.bias": {
"min": -0.24774602055549622,
"max": 0.04635339602828026,
"mean": -0.03926930949091911,
"std": 0.02325906977057457,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.18.4.ff.2.weight": {
"min": -0.6263424754142761,
"max": 0.5970045328140259,
"mean": -5.938729736953974e-05,
"std": 0.05312504991889,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.18.4.ff.2.bias": {
"min": -0.7097107172012329,
"max": 0.26584240794181824,
"mean": 0.0009143413626588881,
"std": 0.051234155893325806,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.0.weight": {
"min": -0.3434857726097107,
"max": 0.30358248949050903,
"mean": 1.7036518329405226e-07,
"std": 0.019139336422085762,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.19.1.g": {
"min": 0.3498973548412323,
"max": 0.782823920249939,
"mean": 0.6388742327690125,
"std": 0.04923625662922859,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_q.weight": {
"min": -0.20567476749420166,
"max": 0.20698602497577667,
"mean": -5.99086306465324e-05,
"std": 0.03769771382212639,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_q.bias": {
"min": -0.25861528515815735,
"max": 0.2681594491004944,
"mean": -0.00040319678373634815,
"std": 0.04461444541811943,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_k.weight": {
"min": -0.354155570268631,
"max": 0.3225230574607849,
"mean": -7.215602636279073e-06,
"std": 0.03720592334866524,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_k.bias": {
"min": -5.261765480041504,
"max": 4.204793453216553,
"mean": -0.026421742513775826,
"std": 1.0068086385726929,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_v.weight": {
"min": -0.23872113227844238,
"max": 0.24366846680641174,
"mean": -2.556562321842648e-05,
"std": 0.043214697390794754,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_v.bias": {
"min": -0.0623321607708931,
"max": 0.056722186505794525,
"mean": 0.0003460783918853849,
"std": 0.014153210446238518,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_out.0.weight": {
"min": -0.4375341534614563,
"max": 0.3737650513648987,
"mean": 1.4479240235232282e-05,
"std": 0.04412652924656868,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_out.0.bias": {
"min": -0.0964576005935669,
"max": 0.1761614829301834,
"mean": -0.0006592521094717085,
"std": 0.035152681171894073,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.3.g": {
"min": 0.4216594994068146,
"max": 1.0695232152938843,
"mean": 0.7485226988792419,
"std": 0.042068321257829666,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.4.ff.0.0.weight": {
"min": -0.2659566104412079,
"max": 0.2967792749404907,
"mean": -7.885666127549484e-05,
"std": 0.04081219807267189,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.19.4.ff.0.0.bias": {
"min": -0.18503932654857635,
"max": 0.04330001026391983,
"mean": -0.03681433945894241,
"std": 0.025581127032637596,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.19.4.ff.2.weight": {
"min": -0.4577261507511139,
"max": 0.4869215786457062,
"mean": 4.5667507947655395e-05,
"std": 0.05421961098909378,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.19.4.ff.2.bias": {
"min": -0.286339670419693,
"max": 0.5517974495887756,
"mean": -0.0008834124309942126,
"std": 0.047834936529397964,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.0.weight": {
"min": -0.2927553355693817,
"max": 0.32282471656799316,
"mean": 6.005510840623174e-06,
"std": 0.01997239701449871,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.20.1.g": {
"min": 0.2911321222782135,
"max": 0.7601316571235657,
"mean": 0.6508502960205078,
"std": 0.052130550146102905,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_q.weight": {
"min": -0.24379833042621613,
"max": 0.26165705919265747,
"mean": -5.548093668039655e-06,
"std": 0.03961396589875221,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_q.bias": {
"min": -0.267425537109375,
"max": 0.20018436014652252,
"mean": -0.0008745841332711279,
"std": 0.05175970122218132,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_k.weight": {
"min": -0.27216100692749023,
"max": 0.2537060081958771,
"mean": 4.9225500333704986e-06,
"std": 0.03871043771505356,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_k.bias": {
"min": -12.965754508972168,
"max": 15.947580337524414,
"mean": 0.03322947770357132,
"std": 1.9892938137054443,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_v.weight": {
"min": -0.20684319734573364,
"max": 0.22589777410030365,
"mean": -7.25259305909276e-05,
"std": 0.040558259934186935,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_v.bias": {
"min": -0.06933015584945679,
"max": 0.06318464130163193,
"mean": 0.00015395943773910403,
"std": 0.014743377454578876,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_out.0.weight": {
"min": -0.4654809832572937,
"max": 0.3203279674053192,
"mean": 1.985491326195188e-05,
"std": 0.0405937097966671,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_out.0.bias": {
"min": -0.06401513516902924,
"max": 0.11543548107147217,
"mean": 0.0011928649619221687,
"std": 0.024708228185772896,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.3.g": {
"min": 0.37496218085289,
"max": 0.9319577217102051,
"mean": 0.7510663270950317,
"std": 0.04019522666931152,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.4.ff.0.0.weight": {
"min": -0.27932143211364746,
"max": 0.2732137441635132,
"mean": -0.00016841731849126518,
"std": 0.04100305214524269,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.20.4.ff.0.0.bias": {
"min": -0.19859075546264648,
"max": 0.05119071155786514,
"mean": -0.032025426626205444,
"std": 0.02508244849741459,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.20.4.ff.2.weight": {
"min": -0.6584441065788269,
"max": 0.5357497930526733,
"mean": -4.779139635502361e-05,
"std": 0.05285602807998657,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.20.4.ff.2.bias": {
"min": -0.19279344379901886,
"max": 0.5823235511779785,
"mean": -0.0005150774959474802,
"std": 0.04108597710728645,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.0.weight": {
"min": -0.417548805475235,
"max": 0.3718253970146179,
"mean": 6.455363291024696e-06,
"std": 0.021627577021718025,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.21.1.g": {
"min": 0.2144990712404251,
"max": 0.7469203472137451,
"mean": 0.6495254039764404,
"std": 0.054346147924661636,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_q.weight": {
"min": -0.2095586657524109,
"max": 0.19582423567771912,
"mean": 4.027899194625206e-05,
"std": 0.039461854845285416,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_q.bias": {
"min": -0.3295177221298218,
"max": 0.25955715775489807,
"mean": -0.003232627874240279,
"std": 0.056272272020578384,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_k.weight": {
"min": -0.20599152147769928,
"max": 0.2547609806060791,
"mean": 5.4062355047790334e-05,
"std": 0.0385642908513546,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_k.bias": {
"min": -6.2438578605651855,
"max": 6.932709217071533,
"mean": 0.0483400858938694,
"std": 1.3851662874221802,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_v.weight": {
"min": -0.2099662721157074,
"max": 0.23050634562969208,
"mean": -4.679883659264306e-06,
"std": 0.04131751507520676,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_v.bias": {
"min": -0.04376063123345375,
"max": 0.03601124510169029,
"mean": -5.941561539657414e-06,
"std": 0.012793137691915035,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_out.0.weight": {
"min": -0.39767444133758545,
"max": 0.34496286511421204,
"mean": -5.524931111722253e-05,
"std": 0.04239441454410553,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_out.0.bias": {
"min": -0.055049996823072433,
"max": 0.06284762173891068,
"mean": 0.0003571161942090839,
"std": 0.018672263249754906,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.3.g": {
"min": 0.35070401430130005,
"max": 1.045300006866455,
"mean": 0.7896326184272766,
"std": 0.04874366521835327,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.4.ff.0.0.weight": {
"min": -0.3336896300315857,
"max": 0.38648444414138794,
"mean": -0.00016903391224332154,
"std": 0.04148908331990242,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.21.4.ff.0.0.bias": {
"min": -0.15745577216148376,
"max": 0.05912669003009796,
"mean": -0.03182134032249451,
"std": 0.02510516531765461,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.21.4.ff.2.weight": {
"min": -0.6961610317230225,
"max": 0.46920138597488403,
"mean": -8.453470945823938e-05,
"std": 0.051804590970277786,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.21.4.ff.2.bias": {
"min": -0.2479942888021469,
"max": 0.32869523763656616,
"mean": -0.00026210874784737825,
"std": 0.04145258665084839,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.0.weight": {
"min": -0.2870560884475708,
"max": 0.3504050374031067,
"mean": -2.7076764581579482e-06,
"std": 0.024242233484983444,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.22.1.g": {
"min": 0.19671642780303955,
"max": 0.779133677482605,
"mean": 0.6702357530593872,
"std": 0.058674510568380356,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_q.weight": {
"min": -0.2289534956216812,
"max": 0.23123182356357574,
"mean": -2.0453815523069352e-05,
"std": 0.040439117699861526,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_q.bias": {
"min": -0.22002340853214264,
"max": 0.24095596373081207,
"mean": 0.0007837469456717372,
"std": 0.05583859235048294,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_k.weight": {
"min": -0.2165220081806183,
"max": 0.22644749283790588,
"mean": -7.203388668131083e-05,
"std": 0.03937385976314545,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_k.bias": {
"min": -8.905970573425293,
"max": 9.068842887878418,
"mean": -0.001253342255949974,
"std": 1.848394513130188,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_v.weight": {
"min": -0.2694862186908722,
"max": 0.2589434087276459,
"mean": 4.364973574411124e-05,
"std": 0.038410402834415436,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_v.bias": {
"min": -0.05793758109211922,
"max": 0.05797392502427101,
"mean": 0.0003538080782163888,
"std": 0.01471701916307211,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_out.0.weight": {
"min": -0.26422733068466187,
"max": 0.28839007019996643,
"mean": -6.168079562485218e-05,
"std": 0.039077237248420715,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_out.0.bias": {
"min": -0.04391145706176758,
"max": 0.03739985078573227,
"mean": -9.783620771486312e-05,
"std": 0.013347266241908073,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.3.g": {
"min": 0.3393727242946625,
"max": 1.0925297737121582,
"mean": 0.8639394640922546,
"std": 0.06387537717819214,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.4.ff.0.0.weight": {
"min": -0.4232022762298584,
"max": 0.41904953122138977,
"mean": 0.000313526950776577,
"std": 0.043511807918548584,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.22.4.ff.0.0.bias": {
"min": -0.21472877264022827,
"max": 0.1706702560186386,
"mean": -0.029442301020026207,
"std": 0.03188013657927513,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.22.4.ff.2.weight": {
"min": -0.5987806916236877,
"max": 0.5598706007003784,
"mean": -0.00014896712673362345,
"std": 0.05345924198627472,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.22.4.ff.2.bias": {
"min": -0.17873696982860565,
"max": 0.3771279752254486,
"mean": 0.001353989471681416,
"std": 0.037307873368263245,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.0.weight": {
"min": -0.39440417289733887,
"max": 0.36891528964042664,
"mean": 3.757418380700983e-05,
"std": 0.028618069365620613,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.23.1.g": {
"min": 0.2904903292655945,
"max": 0.8274624347686768,
"mean": 0.7055505514144897,
"std": 0.06785926967859268,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_q.weight": {
"min": -0.9264864325523376,
"max": 1.0268279314041138,
"mean": -2.7663820219459012e-05,
"std": 0.04763999581336975,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_q.bias": {
"min": -0.8792101740837097,
"max": 0.8157498240470886,
"mean": -0.00029962146072648466,
"std": 0.09555412083864212,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_k.weight": {
"min": -0.26960939168930054,
"max": 0.24089379608631134,
"mean": -2.2403137336368673e-05,
"std": 0.038951266556978226,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_k.bias": {
"min": -23.743011474609375,
"max": 22.851470947265625,
"mean": -0.09188262373209,
"std": 4.07051944732666,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_v.weight": {
"min": -0.2278052568435669,
"max": 0.2454863339662552,
"mean": -2.561333167250268e-05,
"std": 0.0386415459215641,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_v.bias": {
"min": -0.060211505740880966,
"max": 0.04552706331014633,
"mean": -0.00013798139116261154,
"std": 0.014687996357679367,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_out.0.weight": {
"min": -0.3381482660770416,
"max": 0.3747510015964508,
"mean": 7.467011528206058e-06,
"std": 0.04082018882036209,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_out.0.bias": {
"min": -0.04627379775047302,
"max": 0.19550754129886627,
"mean": 0.00027567092911340296,
"std": 0.01355433464050293,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.3.g": {
"min": 0.3735484182834625,
"max": 1.130308985710144,
"mean": 0.8902099132537842,
"std": 0.06400929391384125,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.4.ff.0.0.weight": {
"min": -0.4475434124469757,
"max": 0.5425565838813782,
"mean": 2.4953253159765154e-05,
"std": 0.0455789715051651,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.23.4.ff.0.0.bias": {
"min": -0.223903626203537,
"max": 0.08773155510425568,
"mean": -0.0320122167468071,
"std": 0.03775562718510628,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.23.4.ff.2.weight": {
"min": -0.7259138822555542,
"max": 0.6885775923728943,
"mean": 3.529630339471623e-05,
"std": 0.05179176479578018,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.23.4.ff.2.bias": {
"min": -0.17448118329048157,
"max": 0.2181989699602127,
"mean": 3.60202684532851e-05,
"std": 0.03176648169755936,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.0.weight": {
"min": -0.33964094519615173,
"max": 0.3732447624206543,
"mean": 4.3327472667442635e-05,
"std": 0.03413660451769829,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.24.1.g": {
"min": 0.31786343455314636,
"max": 1.2872315645217896,
"mean": 0.6015468835830688,
"std": 0.08348662406206131,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_q.weight": {
"min": -0.2830578088760376,
"max": 0.26022711396217346,
"mean": -2.739398723861086e-06,
"std": 0.03598024696111679,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_q.bias": {
"min": -0.2355988472700119,
"max": 0.205682173371315,
"mean": 0.00023985601728782058,
"std": 0.05602918937802315,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_k.weight": {
"min": -0.4354512095451355,
"max": 0.3249225318431854,
"mean": 2.4408442186540924e-05,
"std": 0.034124936908483505,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_k.bias": {
"min": -5.546271324157715,
"max": 7.313862323760986,
"mean": -0.007370356470346451,
"std": 0.6993649005889893,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_v.weight": {
"min": -0.3440709412097931,
"max": 0.3629132807254791,
"mean": 0.00010299268615199253,
"std": 0.04783618077635765,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_v.bias": {
"min": -0.07372982054948807,
"max": 0.060475897043943405,
"mean": 0.0009333858033642173,
"std": 0.014939810149371624,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_out.0.weight": {
"min": -0.2562521696090698,
"max": 0.2865331768989563,
"mean": 4.6935901991673745e-06,
"std": 0.04156438633799553,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_out.0.bias": {
"min": -0.05538477003574371,
"max": 0.06286550313234329,
"mean": 0.00012986664660274982,
"std": 0.007165286689996719,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.3.g": {
"min": 0.49387338757514954,
"max": 1.2207623720169067,
"mean": 1.0135465860366821,
"std": 0.11748857796192169,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.4.ff.0.0.weight": {
"min": -1.0939587354660034,
"max": 1.0474854707717896,
"mean": -4.887886461801827e-05,
"std": 0.052416812628507614,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.24.4.ff.0.0.bias": {
"min": -0.22367814183235168,
"max": 0.17331884801387787,
"mean": -0.027228882536292076,
"std": 0.03631311282515526,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.24.4.ff.2.weight": {
"min": -0.8839902877807617,
"max": 0.9222039580345154,
"mean": -0.00014613418898079544,
"std": 0.0532962903380394,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.24.4.ff.2.bias": {
"min": -0.17102710902690887,
"max": 0.37978917360305786,
"mean": 0.0033693695440888405,
"std": 0.03987928107380867,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.0.weight": {
"min": -0.7775930762290955,
"max": 0.7230536341667175,
"mean": 1.795422940631397e-05,
"std": 0.04615578427910805,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.25.1.g": {
"min": 0.33868706226348877,
"max": 1.428168535232544,
"mean": 0.948466420173645,
"std": 0.206797257065773,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_q.weight": {
"min": -1.7458410263061523,
"max": 1.7044554948806763,
"mean": 0.00022709151380695403,
"std": 0.1587017923593521,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_q.bias": {
"min": -1.199466586112976,
"max": 1.1009190082550049,
"mean": -0.009544244036078453,
"std": 0.20388931035995483,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_k.weight": {
"min": -0.4210292100906372,
"max": 0.42779824137687683,
"mean": 6.407736509572715e-05,
"std": 0.04801918938755989,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_k.bias": {
"min": -19.747289657592773,
"max": 19.542404174804688,
"mean": -0.24833638966083527,
"std": 4.7769317626953125,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_v.weight": {
"min": -0.32385140657424927,
"max": 0.4385547339916229,
"mean": -1.1735279258573428e-05,
"std": 0.04616609960794449,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_v.bias": {
"min": -0.0338931679725647,
"max": 0.036946121603250504,
"mean": 0.0006420122808776796,
"std": 0.012915823608636856,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_out.0.weight": {
"min": -0.7037211656570435,
"max": 0.668102502822876,
"mean": 4.292663652449846e-05,
"std": 0.05789082497358322,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_out.0.bias": {
"min": -0.07235053181648254,
"max": 0.06769613176584244,
"mean": -0.0001348661899100989,
"std": 0.01290997676551342,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.3.g": {
"min": 0.38041582703590393,
"max": 1.3927761316299438,
"mean": 1.06671142578125,
"std": 0.21977396309375763,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.4.ff.0.0.weight": {
"min": -0.6164037585258484,
"max": 0.7183761596679688,
"mean": 0.00011247429210925475,
"std": 0.05802652984857559,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.25.4.ff.0.0.bias": {
"min": -0.21889959275722504,
"max": 0.22502842545509338,
"mean": 0.006201672367751598,
"std": 0.049709536135196686,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.25.4.ff.2.weight": {
"min": -0.6296432018280029,
"max": 0.8894878029823303,
"mean": 1.1972185347985942e-05,
"std": 0.02354392781853676,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.25.4.ff.2.bias": {
"min": -0.5068784356117249,
"max": 0.47380438446998596,
"mean": -0.0030183307826519012,
"std": 0.06925629079341888,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.norm_out.g": {
"min": 0.5380737781524658,
"max": 1.1801798343658447,
"mean": 0.7828105092048645,
"std": 0.09876621514558792,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.proj_out.weight": {
"min": -0.2670763432979584,
"max": 0.21297039091587067,
"mean": -0.0002238377055618912,
"std": 0.05400474742054939,
"sparsity": 0.0,
"shape": [
100,
1024
]
},
"transformer.proj_out.bias": {
"min": -0.23828226327896118,
"max": 0.014816822484135628,
"mean": -0.043933507055044174,
"std": 0.034287311136722565,
"sparsity": 0.0,
"shape": [
100
]
}
}
}