int0 / model_analysis.json
memevis's picture
Upload folder using huggingface_hub
ba9cb2d verified
{
"layer_types": {
"transformer": 391
},
"parameter_counts": {
"transformer.time_embed.time_mlp.0.weight": 262144,
"transformer.time_embed.time_mlp.0.bias": 1024,
"transformer.time_embed.time_mlp.2.weight": 1048576,
"transformer.time_embed.time_mlp.2.bias": 1024,
"transformer.text_embed.text_embed.weight": 254600,
"transformer.input_embed.proj.weight": 307200,
"transformer.input_embed.proj.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024,
"transformer.layers.0.1.g": 1024,
"transformer.layers.0.2.to_q.weight": 1048576,
"transformer.layers.0.2.to_q.bias": 1024,
"transformer.layers.0.2.to_k.weight": 1048576,
"transformer.layers.0.2.to_k.bias": 1024,
"transformer.layers.0.2.to_v.weight": 1048576,
"transformer.layers.0.2.to_v.bias": 1024,
"transformer.layers.0.2.to_out.0.weight": 1048576,
"transformer.layers.0.2.to_out.0.bias": 1024,
"transformer.layers.0.3.g": 1024,
"transformer.layers.0.4.ff.0.0.weight": 4194304,
"transformer.layers.0.4.ff.0.0.bias": 4096,
"transformer.layers.0.4.ff.2.weight": 4194304,
"transformer.layers.0.4.ff.2.bias": 1024,
"transformer.layers.1.1.g": 1024,
"transformer.layers.1.2.to_q.weight": 1048576,
"transformer.layers.1.2.to_q.bias": 1024,
"transformer.layers.1.2.to_k.weight": 1048576,
"transformer.layers.1.2.to_k.bias": 1024,
"transformer.layers.1.2.to_v.weight": 1048576,
"transformer.layers.1.2.to_v.bias": 1024,
"transformer.layers.1.2.to_out.0.weight": 1048576,
"transformer.layers.1.2.to_out.0.bias": 1024,
"transformer.layers.1.3.g": 1024,
"transformer.layers.1.4.ff.0.0.weight": 4194304,
"transformer.layers.1.4.ff.0.0.bias": 4096,
"transformer.layers.1.4.ff.2.weight": 4194304,
"transformer.layers.1.4.ff.2.bias": 1024,
"transformer.layers.2.1.g": 1024,
"transformer.layers.2.2.to_q.weight": 1048576,
"transformer.layers.2.2.to_q.bias": 1024,
"transformer.layers.2.2.to_k.weight": 1048576,
"transformer.layers.2.2.to_k.bias": 1024,
"transformer.layers.2.2.to_v.weight": 1048576,
"transformer.layers.2.2.to_v.bias": 1024,
"transformer.layers.2.2.to_out.0.weight": 1048576,
"transformer.layers.2.2.to_out.0.bias": 1024,
"transformer.layers.2.3.g": 1024,
"transformer.layers.2.4.ff.0.0.weight": 4194304,
"transformer.layers.2.4.ff.0.0.bias": 4096,
"transformer.layers.2.4.ff.2.weight": 4194304,
"transformer.layers.2.4.ff.2.bias": 1024,
"transformer.layers.3.1.g": 1024,
"transformer.layers.3.2.to_q.weight": 1048576,
"transformer.layers.3.2.to_q.bias": 1024,
"transformer.layers.3.2.to_k.weight": 1048576,
"transformer.layers.3.2.to_k.bias": 1024,
"transformer.layers.3.2.to_v.weight": 1048576,
"transformer.layers.3.2.to_v.bias": 1024,
"transformer.layers.3.2.to_out.0.weight": 1048576,
"transformer.layers.3.2.to_out.0.bias": 1024,
"transformer.layers.3.3.g": 1024,
"transformer.layers.3.4.ff.0.0.weight": 4194304,
"transformer.layers.3.4.ff.0.0.bias": 4096,
"transformer.layers.3.4.ff.2.weight": 4194304,
"transformer.layers.3.4.ff.2.bias": 1024,
"transformer.layers.4.1.g": 1024,
"transformer.layers.4.2.to_q.weight": 1048576,
"transformer.layers.4.2.to_q.bias": 1024,
"transformer.layers.4.2.to_k.weight": 1048576,
"transformer.layers.4.2.to_k.bias": 1024,
"transformer.layers.4.2.to_v.weight": 1048576,
"transformer.layers.4.2.to_v.bias": 1024,
"transformer.layers.4.2.to_out.0.weight": 1048576,
"transformer.layers.4.2.to_out.0.bias": 1024,
"transformer.layers.4.3.g": 1024,
"transformer.layers.4.4.ff.0.0.weight": 4194304,
"transformer.layers.4.4.ff.0.0.bias": 4096,
"transformer.layers.4.4.ff.2.weight": 4194304,
"transformer.layers.4.4.ff.2.bias": 1024,
"transformer.layers.5.1.g": 1024,
"transformer.layers.5.2.to_q.weight": 1048576,
"transformer.layers.5.2.to_q.bias": 1024,
"transformer.layers.5.2.to_k.weight": 1048576,
"transformer.layers.5.2.to_k.bias": 1024,
"transformer.layers.5.2.to_v.weight": 1048576,
"transformer.layers.5.2.to_v.bias": 1024,
"transformer.layers.5.2.to_out.0.weight": 1048576,
"transformer.layers.5.2.to_out.0.bias": 1024,
"transformer.layers.5.3.g": 1024,
"transformer.layers.5.4.ff.0.0.weight": 4194304,
"transformer.layers.5.4.ff.0.0.bias": 4096,
"transformer.layers.5.4.ff.2.weight": 4194304,
"transformer.layers.5.4.ff.2.bias": 1024,
"transformer.layers.6.1.g": 1024,
"transformer.layers.6.2.to_q.weight": 1048576,
"transformer.layers.6.2.to_q.bias": 1024,
"transformer.layers.6.2.to_k.weight": 1048576,
"transformer.layers.6.2.to_k.bias": 1024,
"transformer.layers.6.2.to_v.weight": 1048576,
"transformer.layers.6.2.to_v.bias": 1024,
"transformer.layers.6.2.to_out.0.weight": 1048576,
"transformer.layers.6.2.to_out.0.bias": 1024,
"transformer.layers.6.3.g": 1024,
"transformer.layers.6.4.ff.0.0.weight": 4194304,
"transformer.layers.6.4.ff.0.0.bias": 4096,
"transformer.layers.6.4.ff.2.weight": 4194304,
"transformer.layers.6.4.ff.2.bias": 1024,
"transformer.layers.7.1.g": 1024,
"transformer.layers.7.2.to_q.weight": 1048576,
"transformer.layers.7.2.to_q.bias": 1024,
"transformer.layers.7.2.to_k.weight": 1048576,
"transformer.layers.7.2.to_k.bias": 1024,
"transformer.layers.7.2.to_v.weight": 1048576,
"transformer.layers.7.2.to_v.bias": 1024,
"transformer.layers.7.2.to_out.0.weight": 1048576,
"transformer.layers.7.2.to_out.0.bias": 1024,
"transformer.layers.7.3.g": 1024,
"transformer.layers.7.4.ff.0.0.weight": 4194304,
"transformer.layers.7.4.ff.0.0.bias": 4096,
"transformer.layers.7.4.ff.2.weight": 4194304,
"transformer.layers.7.4.ff.2.bias": 1024,
"transformer.layers.8.1.g": 1024,
"transformer.layers.8.2.to_q.weight": 1048576,
"transformer.layers.8.2.to_q.bias": 1024,
"transformer.layers.8.2.to_k.weight": 1048576,
"transformer.layers.8.2.to_k.bias": 1024,
"transformer.layers.8.2.to_v.weight": 1048576,
"transformer.layers.8.2.to_v.bias": 1024,
"transformer.layers.8.2.to_out.0.weight": 1048576,
"transformer.layers.8.2.to_out.0.bias": 1024,
"transformer.layers.8.3.g": 1024,
"transformer.layers.8.4.ff.0.0.weight": 4194304,
"transformer.layers.8.4.ff.0.0.bias": 4096,
"transformer.layers.8.4.ff.2.weight": 4194304,
"transformer.layers.8.4.ff.2.bias": 1024,
"transformer.layers.9.1.g": 1024,
"transformer.layers.9.2.to_q.weight": 1048576,
"transformer.layers.9.2.to_q.bias": 1024,
"transformer.layers.9.2.to_k.weight": 1048576,
"transformer.layers.9.2.to_k.bias": 1024,
"transformer.layers.9.2.to_v.weight": 1048576,
"transformer.layers.9.2.to_v.bias": 1024,
"transformer.layers.9.2.to_out.0.weight": 1048576,
"transformer.layers.9.2.to_out.0.bias": 1024,
"transformer.layers.9.3.g": 1024,
"transformer.layers.9.4.ff.0.0.weight": 4194304,
"transformer.layers.9.4.ff.0.0.bias": 4096,
"transformer.layers.9.4.ff.2.weight": 4194304,
"transformer.layers.9.4.ff.2.bias": 1024,
"transformer.layers.10.1.g": 1024,
"transformer.layers.10.2.to_q.weight": 1048576,
"transformer.layers.10.2.to_q.bias": 1024,
"transformer.layers.10.2.to_k.weight": 1048576,
"transformer.layers.10.2.to_k.bias": 1024,
"transformer.layers.10.2.to_v.weight": 1048576,
"transformer.layers.10.2.to_v.bias": 1024,
"transformer.layers.10.2.to_out.0.weight": 1048576,
"transformer.layers.10.2.to_out.0.bias": 1024,
"transformer.layers.10.3.g": 1024,
"transformer.layers.10.4.ff.0.0.weight": 4194304,
"transformer.layers.10.4.ff.0.0.bias": 4096,
"transformer.layers.10.4.ff.2.weight": 4194304,
"transformer.layers.10.4.ff.2.bias": 1024,
"transformer.layers.11.1.g": 1024,
"transformer.layers.11.2.to_q.weight": 1048576,
"transformer.layers.11.2.to_q.bias": 1024,
"transformer.layers.11.2.to_k.weight": 1048576,
"transformer.layers.11.2.to_k.bias": 1024,
"transformer.layers.11.2.to_v.weight": 1048576,
"transformer.layers.11.2.to_v.bias": 1024,
"transformer.layers.11.2.to_out.0.weight": 1048576,
"transformer.layers.11.2.to_out.0.bias": 1024,
"transformer.layers.11.3.g": 1024,
"transformer.layers.11.4.ff.0.0.weight": 4194304,
"transformer.layers.11.4.ff.0.0.bias": 4096,
"transformer.layers.11.4.ff.2.weight": 4194304,
"transformer.layers.11.4.ff.2.bias": 1024,
"transformer.layers.12.1.g": 1024,
"transformer.layers.12.2.to_q.weight": 1048576,
"transformer.layers.12.2.to_q.bias": 1024,
"transformer.layers.12.2.to_k.weight": 1048576,
"transformer.layers.12.2.to_k.bias": 1024,
"transformer.layers.12.2.to_v.weight": 1048576,
"transformer.layers.12.2.to_v.bias": 1024,
"transformer.layers.12.2.to_out.0.weight": 1048576,
"transformer.layers.12.2.to_out.0.bias": 1024,
"transformer.layers.12.3.g": 1024,
"transformer.layers.12.4.ff.0.0.weight": 4194304,
"transformer.layers.12.4.ff.0.0.bias": 4096,
"transformer.layers.12.4.ff.2.weight": 4194304,
"transformer.layers.12.4.ff.2.bias": 1024,
"transformer.layers.13.0.weight": 2097152,
"transformer.layers.13.1.g": 1024,
"transformer.layers.13.2.to_q.weight": 1048576,
"transformer.layers.13.2.to_q.bias": 1024,
"transformer.layers.13.2.to_k.weight": 1048576,
"transformer.layers.13.2.to_k.bias": 1024,
"transformer.layers.13.2.to_v.weight": 1048576,
"transformer.layers.13.2.to_v.bias": 1024,
"transformer.layers.13.2.to_out.0.weight": 1048576,
"transformer.layers.13.2.to_out.0.bias": 1024,
"transformer.layers.13.3.g": 1024,
"transformer.layers.13.4.ff.0.0.weight": 4194304,
"transformer.layers.13.4.ff.0.0.bias": 4096,
"transformer.layers.13.4.ff.2.weight": 4194304,
"transformer.layers.13.4.ff.2.bias": 1024,
"transformer.layers.14.0.weight": 2097152,
"transformer.layers.14.1.g": 1024,
"transformer.layers.14.2.to_q.weight": 1048576,
"transformer.layers.14.2.to_q.bias": 1024,
"transformer.layers.14.2.to_k.weight": 1048576,
"transformer.layers.14.2.to_k.bias": 1024,
"transformer.layers.14.2.to_v.weight": 1048576,
"transformer.layers.14.2.to_v.bias": 1024,
"transformer.layers.14.2.to_out.0.weight": 1048576,
"transformer.layers.14.2.to_out.0.bias": 1024,
"transformer.layers.14.3.g": 1024,
"transformer.layers.14.4.ff.0.0.weight": 4194304,
"transformer.layers.14.4.ff.0.0.bias": 4096,
"transformer.layers.14.4.ff.2.weight": 4194304,
"transformer.layers.14.4.ff.2.bias": 1024,
"transformer.layers.15.0.weight": 2097152,
"transformer.layers.15.1.g": 1024,
"transformer.layers.15.2.to_q.weight": 1048576,
"transformer.layers.15.2.to_q.bias": 1024,
"transformer.layers.15.2.to_k.weight": 1048576,
"transformer.layers.15.2.to_k.bias": 1024,
"transformer.layers.15.2.to_v.weight": 1048576,
"transformer.layers.15.2.to_v.bias": 1024,
"transformer.layers.15.2.to_out.0.weight": 1048576,
"transformer.layers.15.2.to_out.0.bias": 1024,
"transformer.layers.15.3.g": 1024,
"transformer.layers.15.4.ff.0.0.weight": 4194304,
"transformer.layers.15.4.ff.0.0.bias": 4096,
"transformer.layers.15.4.ff.2.weight": 4194304,
"transformer.layers.15.4.ff.2.bias": 1024,
"transformer.layers.16.0.weight": 2097152,
"transformer.layers.16.1.g": 1024,
"transformer.layers.16.2.to_q.weight": 1048576,
"transformer.layers.16.2.to_q.bias": 1024,
"transformer.layers.16.2.to_k.weight": 1048576,
"transformer.layers.16.2.to_k.bias": 1024,
"transformer.layers.16.2.to_v.weight": 1048576,
"transformer.layers.16.2.to_v.bias": 1024,
"transformer.layers.16.2.to_out.0.weight": 1048576,
"transformer.layers.16.2.to_out.0.bias": 1024,
"transformer.layers.16.3.g": 1024,
"transformer.layers.16.4.ff.0.0.weight": 4194304,
"transformer.layers.16.4.ff.0.0.bias": 4096,
"transformer.layers.16.4.ff.2.weight": 4194304,
"transformer.layers.16.4.ff.2.bias": 1024,
"transformer.layers.17.0.weight": 2097152,
"transformer.layers.17.1.g": 1024,
"transformer.layers.17.2.to_q.weight": 1048576,
"transformer.layers.17.2.to_q.bias": 1024,
"transformer.layers.17.2.to_k.weight": 1048576,
"transformer.layers.17.2.to_k.bias": 1024,
"transformer.layers.17.2.to_v.weight": 1048576,
"transformer.layers.17.2.to_v.bias": 1024,
"transformer.layers.17.2.to_out.0.weight": 1048576,
"transformer.layers.17.2.to_out.0.bias": 1024,
"transformer.layers.17.3.g": 1024,
"transformer.layers.17.4.ff.0.0.weight": 4194304,
"transformer.layers.17.4.ff.0.0.bias": 4096,
"transformer.layers.17.4.ff.2.weight": 4194304,
"transformer.layers.17.4.ff.2.bias": 1024,
"transformer.layers.18.0.weight": 2097152,
"transformer.layers.18.1.g": 1024,
"transformer.layers.18.2.to_q.weight": 1048576,
"transformer.layers.18.2.to_q.bias": 1024,
"transformer.layers.18.2.to_k.weight": 1048576,
"transformer.layers.18.2.to_k.bias": 1024,
"transformer.layers.18.2.to_v.weight": 1048576,
"transformer.layers.18.2.to_v.bias": 1024,
"transformer.layers.18.2.to_out.0.weight": 1048576,
"transformer.layers.18.2.to_out.0.bias": 1024,
"transformer.layers.18.3.g": 1024,
"transformer.layers.18.4.ff.0.0.weight": 4194304,
"transformer.layers.18.4.ff.0.0.bias": 4096,
"transformer.layers.18.4.ff.2.weight": 4194304,
"transformer.layers.18.4.ff.2.bias": 1024,
"transformer.layers.19.0.weight": 2097152,
"transformer.layers.19.1.g": 1024,
"transformer.layers.19.2.to_q.weight": 1048576,
"transformer.layers.19.2.to_q.bias": 1024,
"transformer.layers.19.2.to_k.weight": 1048576,
"transformer.layers.19.2.to_k.bias": 1024,
"transformer.layers.19.2.to_v.weight": 1048576,
"transformer.layers.19.2.to_v.bias": 1024,
"transformer.layers.19.2.to_out.0.weight": 1048576,
"transformer.layers.19.2.to_out.0.bias": 1024,
"transformer.layers.19.3.g": 1024,
"transformer.layers.19.4.ff.0.0.weight": 4194304,
"transformer.layers.19.4.ff.0.0.bias": 4096,
"transformer.layers.19.4.ff.2.weight": 4194304,
"transformer.layers.19.4.ff.2.bias": 1024,
"transformer.layers.20.0.weight": 2097152,
"transformer.layers.20.1.g": 1024,
"transformer.layers.20.2.to_q.weight": 1048576,
"transformer.layers.20.2.to_q.bias": 1024,
"transformer.layers.20.2.to_k.weight": 1048576,
"transformer.layers.20.2.to_k.bias": 1024,
"transformer.layers.20.2.to_v.weight": 1048576,
"transformer.layers.20.2.to_v.bias": 1024,
"transformer.layers.20.2.to_out.0.weight": 1048576,
"transformer.layers.20.2.to_out.0.bias": 1024,
"transformer.layers.20.3.g": 1024,
"transformer.layers.20.4.ff.0.0.weight": 4194304,
"transformer.layers.20.4.ff.0.0.bias": 4096,
"transformer.layers.20.4.ff.2.weight": 4194304,
"transformer.layers.20.4.ff.2.bias": 1024,
"transformer.layers.21.0.weight": 2097152,
"transformer.layers.21.1.g": 1024,
"transformer.layers.21.2.to_q.weight": 1048576,
"transformer.layers.21.2.to_q.bias": 1024,
"transformer.layers.21.2.to_k.weight": 1048576,
"transformer.layers.21.2.to_k.bias": 1024,
"transformer.layers.21.2.to_v.weight": 1048576,
"transformer.layers.21.2.to_v.bias": 1024,
"transformer.layers.21.2.to_out.0.weight": 1048576,
"transformer.layers.21.2.to_out.0.bias": 1024,
"transformer.layers.21.3.g": 1024,
"transformer.layers.21.4.ff.0.0.weight": 4194304,
"transformer.layers.21.4.ff.0.0.bias": 4096,
"transformer.layers.21.4.ff.2.weight": 4194304,
"transformer.layers.21.4.ff.2.bias": 1024,
"transformer.layers.22.0.weight": 2097152,
"transformer.layers.22.1.g": 1024,
"transformer.layers.22.2.to_q.weight": 1048576,
"transformer.layers.22.2.to_q.bias": 1024,
"transformer.layers.22.2.to_k.weight": 1048576,
"transformer.layers.22.2.to_k.bias": 1024,
"transformer.layers.22.2.to_v.weight": 1048576,
"transformer.layers.22.2.to_v.bias": 1024,
"transformer.layers.22.2.to_out.0.weight": 1048576,
"transformer.layers.22.2.to_out.0.bias": 1024,
"transformer.layers.22.3.g": 1024,
"transformer.layers.22.4.ff.0.0.weight": 4194304,
"transformer.layers.22.4.ff.0.0.bias": 4096,
"transformer.layers.22.4.ff.2.weight": 4194304,
"transformer.layers.22.4.ff.2.bias": 1024,
"transformer.layers.23.0.weight": 2097152,
"transformer.layers.23.1.g": 1024,
"transformer.layers.23.2.to_q.weight": 1048576,
"transformer.layers.23.2.to_q.bias": 1024,
"transformer.layers.23.2.to_k.weight": 1048576,
"transformer.layers.23.2.to_k.bias": 1024,
"transformer.layers.23.2.to_v.weight": 1048576,
"transformer.layers.23.2.to_v.bias": 1024,
"transformer.layers.23.2.to_out.0.weight": 1048576,
"transformer.layers.23.2.to_out.0.bias": 1024,
"transformer.layers.23.3.g": 1024,
"transformer.layers.23.4.ff.0.0.weight": 4194304,
"transformer.layers.23.4.ff.0.0.bias": 4096,
"transformer.layers.23.4.ff.2.weight": 4194304,
"transformer.layers.23.4.ff.2.bias": 1024,
"transformer.layers.24.0.weight": 2097152,
"transformer.layers.24.1.g": 1024,
"transformer.layers.24.2.to_q.weight": 1048576,
"transformer.layers.24.2.to_q.bias": 1024,
"transformer.layers.24.2.to_k.weight": 1048576,
"transformer.layers.24.2.to_k.bias": 1024,
"transformer.layers.24.2.to_v.weight": 1048576,
"transformer.layers.24.2.to_v.bias": 1024,
"transformer.layers.24.2.to_out.0.weight": 1048576,
"transformer.layers.24.2.to_out.0.bias": 1024,
"transformer.layers.24.3.g": 1024,
"transformer.layers.24.4.ff.0.0.weight": 4194304,
"transformer.layers.24.4.ff.0.0.bias": 4096,
"transformer.layers.24.4.ff.2.weight": 4194304,
"transformer.layers.24.4.ff.2.bias": 1024,
"transformer.layers.25.0.weight": 2097152,
"transformer.layers.25.1.g": 1024,
"transformer.layers.25.2.to_q.weight": 1048576,
"transformer.layers.25.2.to_q.bias": 1024,
"transformer.layers.25.2.to_k.weight": 1048576,
"transformer.layers.25.2.to_k.bias": 1024,
"transformer.layers.25.2.to_v.weight": 1048576,
"transformer.layers.25.2.to_v.bias": 1024,
"transformer.layers.25.2.to_out.0.weight": 1048576,
"transformer.layers.25.2.to_out.0.bias": 1024,
"transformer.layers.25.3.g": 1024,
"transformer.layers.25.4.ff.0.0.weight": 4194304,
"transformer.layers.25.4.ff.0.0.bias": 4096,
"transformer.layers.25.4.ff.2.weight": 4194304,
"transformer.layers.25.4.ff.2.bias": 1024,
"transformer.norm_out.g": 1024,
"transformer.proj_out.weight": 102400,
"transformer.proj_out.bias": 100
},
"important_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
],
"bottleneck_layers": [],
"recommendations": {
"focus_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
]
},
"total_parameters": 391,
"total_elements": 360755948,
"param_ranges": {
"transformer.time_embed.time_mlp.0.weight": {
"min": -0.43058744072914124,
"max": 0.29903075098991394,
"mean": -0.0025567002594470978,
"std": 0.04255249723792076,
"sparsity": 0.0,
"shape": [
1024,
256
]
},
"transformer.time_embed.time_mlp.0.bias": {
"min": -0.06321248412132263,
"max": 0.107655830681324,
"mean": 0.0005928671453148127,
"std": 0.03411800414323807,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.time_embed.time_mlp.2.weight": {
"min": -0.4126332402229309,
"max": 0.8362816572189331,
"mean": -0.00021067322813905776,
"std": 0.024107061326503754,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.time_embed.time_mlp.2.bias": {
"min": -0.11544923484325409,
"max": 0.3215144872665405,
"mean": -0.0009406265453435481,
"std": 0.01957659050822258,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.text_embed.text_embed.weight": {
"min": -2.791715383529663,
"max": 2.870434045791626,
"mean": -0.0003647833364084363,
"std": 0.6153609752655029,
"sparsity": 0.0,
"shape": [
2546,
100
]
},
"transformer.input_embed.proj.weight": {
"min": -0.27896371483802795,
"max": 0.3819044828414917,
"mean": 0.0004220041155349463,
"std": 0.04275014251470566,
"sparsity": 0.0,
"shape": [
1024,
300
]
},
"transformer.input_embed.proj.bias": {
"min": -0.22224494814872742,
"max": 0.20959755778312683,
"mean": -0.004497884772717953,
"std": 0.040913522243499756,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": {
"min": -0.4279385209083557,
"max": 0.4752762019634247,
"mean": 2.009033551075845e-06,
"std": 0.024508582428097725,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": {
"min": -0.32550832629203796,
"max": 0.1569339483976364,
"mean": -0.046702392399311066,
"std": 0.0515773706138134,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": {
"min": -0.4104919135570526,
"max": 0.3544883131980896,
"mean": -0.00012644486560020596,
"std": 0.02360026352107525,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": {
"min": -0.229718416929245,
"max": 0.26262396574020386,
"mean": -0.02914787270128727,
"std": 0.04934746399521828,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.1.g": {
"min": 0.2545970380306244,
"max": 0.8200467824935913,
"mean": 0.5254305601119995,
"std": 0.08080543577671051,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_q.weight": {
"min": -0.29690292477607727,
"max": 0.26533740758895874,
"mean": -0.00042425302672199905,
"std": 0.0321030355989933,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_q.bias": {
"min": -0.09272623807191849,
"max": 0.12487658858299255,
"mean": 0.0006494724657386541,
"std": 0.025737110525369644,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_k.weight": {
"min": -0.29031693935394287,
"max": 0.2813326120376587,
"mean": -7.68666504882276e-05,
"std": 0.03093528188765049,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_k.bias": {
"min": -5.899355888366699,
"max": 5.814132213592529,
"mean": -0.00933213159441948,
"std": 1.29543137550354,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_v.weight": {
"min": -0.42477670311927795,
"max": 0.3437301814556122,
"mean": 9.746497380547225e-05,
"std": 0.029952634125947952,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_v.bias": {
"min": -0.028919341042637825,
"max": 0.027677638456225395,
"mean": -0.00031004834454506636,
"std": 0.012572667561471462,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_out.0.weight": {
"min": -0.4539007246494293,
"max": 0.4487650692462921,
"mean": 2.293557918164879e-05,
"std": 0.023855043575167656,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_out.0.bias": {
"min": -0.08868313580751419,
"max": 0.09119853377342224,
"mean": 0.0022740147542208433,
"std": 0.019512386992573738,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.3.g": {
"min": 0.2666647434234619,
"max": 1.0563400983810425,
"mean": 0.5311195850372314,
"std": 0.10441721975803375,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.4.ff.0.0.weight": {
"min": -0.5746223330497742,
"max": 0.6085677742958069,
"mean": -0.0004311846860218793,
"std": 0.038594383746385574,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.0.4.ff.0.0.bias": {
"min": -0.18177427351474762,
"max": 0.04579279571771622,
"mean": -0.029445737600326538,
"std": 0.04258440434932709,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.0.4.ff.2.weight": {
"min": -1.1666346788406372,
"max": 1.6346005201339722,
"mean": 0.0003186643880326301,
"std": 0.027693353593349457,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.0.4.ff.2.bias": {
"min": -0.16253599524497986,
"max": 0.20575034618377686,
"mean": -0.02111678197979927,
"std": 0.027937985956668854,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.1.g": {
"min": 0.22444167733192444,
"max": 0.8436422944068909,
"mean": 0.4875181317329407,
"std": 0.07519698888063431,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_q.weight": {
"min": -0.25531357526779175,
"max": 0.3059065341949463,
"mean": -9.770956239663064e-06,
"std": 0.03346950560808182,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_q.bias": {
"min": -0.0954207256436348,
"max": 0.11047575622797012,
"mean": 5.4158546845428646e-05,
"std": 0.026984980329871178,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_k.weight": {
"min": -0.2974885404109955,
"max": 0.29604607820510864,
"mean": 5.041498661739752e-05,
"std": 0.03253797069191933,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_k.bias": {
"min": -5.164185523986816,
"max": 5.084409236907959,
"mean": -0.014593909494578838,
"std": 1.1573563814163208,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_v.weight": {
"min": -0.34487831592559814,
"max": 0.34348052740097046,
"mean": 7.885653030825779e-05,
"std": 0.030057402327656746,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_v.bias": {
"min": -0.03615832328796387,
"max": 0.03314381092786789,
"mean": -0.00014287084923125803,
"std": 0.01301794033497572,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_out.0.weight": {
"min": -0.31527891755104065,
"max": 0.3751768469810486,
"mean": -2.1734818801633082e-05,
"std": 0.02405463345348835,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_out.0.bias": {
"min": -0.10528924316167831,
"max": 0.12185486406087875,
"mean": -0.0019566768314689398,
"std": 0.028841182589530945,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.3.g": {
"min": 0.3117589056491852,
"max": 1.1208702325820923,
"mean": 0.6662365198135376,
"std": 0.09775208681821823,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.4.ff.0.0.weight": {
"min": -0.872468888759613,
"max": 0.6275586485862732,
"mean": 0.0016758753918111324,
"std": 0.047438040375709534,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.1.4.ff.0.0.bias": {
"min": -0.2710355520248413,
"max": 0.03406016156077385,
"mean": -0.04659765958786011,
"std": 0.04059656709432602,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.1.4.ff.2.weight": {
"min": -0.9201626181602478,
"max": 0.9643434882164001,
"mean": 0.0010215931106358767,
"std": 0.04070163145661354,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.1.4.ff.2.bias": {
"min": -0.14462199807167053,
"max": 0.07486966252326965,
"mean": -0.009085646830499172,
"std": 0.02570141665637493,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.1.g": {
"min": 0.23963269591331482,
"max": 0.7123461365699768,
"mean": 0.4472006559371948,
"std": 0.05932367965579033,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_q.weight": {
"min": -0.2729354500770569,
"max": 0.29745981097221375,
"mean": 8.72666532814037e-06,
"std": 0.03547453135251999,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_q.bias": {
"min": -0.11902111023664474,
"max": 0.1184910237789154,
"mean": 0.0007516429759562016,
"std": 0.02761562168598175,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_k.weight": {
"min": -0.28102290630340576,
"max": 0.27947571873664856,
"mean": -7.658830872969702e-05,
"std": 0.03510264679789543,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_k.bias": {
"min": -2.509542465209961,
"max": 2.521538496017456,
"mean": 0.026744995266199112,
"std": 0.5867680311203003,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_v.weight": {
"min": -0.2209818959236145,
"max": 0.2715614438056946,
"mean": 2.5364215616718866e-06,
"std": 0.0307310800999403,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_v.bias": {
"min": -0.03315867856144905,
"max": 0.0312359519302845,
"mean": 0.00011449654994066805,
"std": 0.012396099045872688,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_out.0.weight": {
"min": -0.23535357415676117,
"max": 0.23171932995319366,
"mean": 5.724863876821473e-05,
"std": 0.025697464123368263,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_out.0.bias": {
"min": -0.13585864007472992,
"max": 0.12803053855895996,
"mean": -0.0054976665414869785,
"std": 0.039962489157915115,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.3.g": {
"min": 0.3546965718269348,
"max": 1.1723699569702148,
"mean": 0.7105212211608887,
"std": 0.10377959161996841,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.4.ff.0.0.weight": {
"min": -0.6174826622009277,
"max": 0.5556296706199646,
"mean": 0.001160400453954935,
"std": 0.04611344262957573,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.2.4.ff.0.0.bias": {
"min": -0.18955032527446747,
"max": 0.024929288774728775,
"mean": -0.03484814986586571,
"std": 0.02862328663468361,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.2.4.ff.2.weight": {
"min": -1.130905032157898,
"max": 0.970402181148529,
"mean": 0.00035809652763418853,
"std": 0.04234178364276886,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.2.4.ff.2.bias": {
"min": -0.5977792143821716,
"max": 0.06286704540252686,
"mean": -0.004878203850239515,
"std": 0.028615841642022133,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.1.g": {
"min": 0.3753129839897156,
"max": 0.9404288530349731,
"mean": 0.5924519896507263,
"std": 0.06695062667131424,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_q.weight": {
"min": -0.3918393850326538,
"max": 0.3694100081920624,
"mean": 7.003510108916089e-05,
"std": 0.03718580678105354,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_q.bias": {
"min": -0.11892382800579071,
"max": 0.1364460289478302,
"mean": 0.0009139248286373913,
"std": 0.02918536402285099,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_k.weight": {
"min": -0.6189467310905457,
"max": 0.5086581707000732,
"mean": 1.522459842817625e-05,
"std": 0.036438774317502975,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_k.bias": {
"min": -8.18658447265625,
"max": 8.788694381713867,
"mean": -0.10927355289459229,
"std": 1.6988238096237183,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_v.weight": {
"min": -0.27650272846221924,
"max": 0.2397344559431076,
"mean": 5.2208531997166574e-05,
"std": 0.03261270374059677,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_v.bias": {
"min": -0.051591187715530396,
"max": 0.039499007165431976,
"mean": 9.101108298636973e-05,
"std": 0.01296647172421217,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_out.0.weight": {
"min": -0.2308182418346405,
"max": 0.23492185771465302,
"mean": -2.198125366703607e-05,
"std": 0.0293892789632082,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_out.0.bias": {
"min": -0.20422494411468506,
"max": 0.10520327836275101,
"mean": -0.004020952619612217,
"std": 0.032637566328048706,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.3.g": {
"min": 0.3395068645477295,
"max": 1.0124397277832031,
"mean": 0.7006875872612,
"std": 0.09675538539886475,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.4.ff.0.0.weight": {
"min": -0.5645881295204163,
"max": 0.8335761427879333,
"mean": 0.00041510065784677863,
"std": 0.04229363799095154,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.3.4.ff.0.0.bias": {
"min": -0.2121758759021759,
"max": 0.0300263874232769,
"mean": -0.032174285501241684,
"std": 0.026499440893530846,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.3.4.ff.2.weight": {
"min": -0.7549118995666504,
"max": 0.7191137671470642,
"mean": -1.6272973880404606e-05,
"std": 0.03683432564139366,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.3.4.ff.2.bias": {
"min": -0.2633835971355438,
"max": 0.10630631446838379,
"mean": -0.00301279011182487,
"std": 0.028871648013591766,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.1.g": {
"min": 0.2839854061603546,
"max": 0.695024311542511,
"mean": 0.49937066435813904,
"std": 0.04653334617614746,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_q.weight": {
"min": -0.2781727910041809,
"max": 0.23389220237731934,
"mean": -0.00011100011033704504,
"std": 0.0387568399310112,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_q.bias": {
"min": -0.15358875691890717,
"max": 0.12641564011573792,
"mean": -0.0022295925300568342,
"std": 0.03333538770675659,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_k.weight": {
"min": -0.41443270444869995,
"max": 0.6594027280807495,
"mean": -1.858997711678967e-05,
"std": 0.03909648209810257,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_k.bias": {
"min": -4.237802028656006,
"max": 4.722365379333496,
"mean": -0.020456280559301376,
"std": 1.0076717138290405,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_v.weight": {
"min": -0.24511729180812836,
"max": 0.20752397179603577,
"mean": 4.432153218658641e-05,
"std": 0.03396220877766609,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_v.bias": {
"min": -0.03445148468017578,
"max": 0.044871583580970764,
"mean": -1.9065962987951934e-05,
"std": 0.012637496925890446,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_out.0.weight": {
"min": -0.20115934312343597,
"max": 0.20639759302139282,
"mean": -2.9241522497613914e-05,
"std": 0.031020423397421837,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_out.0.bias": {
"min": -0.19977232813835144,
"max": 0.1132478341460228,
"mean": -0.002891883021220565,
"std": 0.03452973812818527,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.3.g": {
"min": 0.3667006194591522,
"max": 1.0575865507125854,
"mean": 0.6704831123352051,
"std": 0.06640235334634781,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.4.ff.0.0.weight": {
"min": -0.39832764863967896,
"max": 0.5020085573196411,
"mean": -3.8792531995568424e-05,
"std": 0.041129473596811295,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.4.4.ff.0.0.bias": {
"min": -0.12865175306797028,
"max": 0.02696564421057701,
"mean": -0.030531559139490128,
"std": 0.021883869543671608,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.4.4.ff.2.weight": {
"min": -0.44955554604530334,
"max": 0.4331819415092468,
"mean": 7.46890582377091e-05,
"std": 0.034889888018369675,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.4.4.ff.2.bias": {
"min": -0.26744911074638367,
"max": 0.07309805601835251,
"mean": -0.0010887861717492342,
"std": 0.023132896050810814,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.1.g": {
"min": 0.28746652603149414,
"max": 0.6852710843086243,
"mean": 0.5245163440704346,
"std": 0.04753531143069267,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_q.weight": {
"min": -0.2225414365530014,
"max": 0.2233862727880478,
"mean": 1.5953022739267908e-05,
"std": 0.038948602974414825,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_q.bias": {
"min": -0.13633988797664642,
"max": 0.10930000245571136,
"mean": 0.00024919791030697525,
"std": 0.029206812381744385,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_k.weight": {
"min": -0.3749636113643646,
"max": 0.43756094574928284,
"mean": -9.44960629567504e-06,
"std": 0.03928674757480621,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_k.bias": {
"min": -3.845799684524536,
"max": 4.999211311340332,
"mean": 0.009741385467350483,
"std": 0.8452029228210449,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_v.weight": {
"min": -0.22279420495033264,
"max": 0.22023756802082062,
"mean": -3.8509870137204416e-07,
"std": 0.03440963104367256,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_v.bias": {
"min": -0.04381667822599411,
"max": 0.03586551547050476,
"mean": -0.0002609736402519047,
"std": 0.012077639810740948,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_out.0.weight": {
"min": -0.21273735165596008,
"max": 0.18841038644313812,
"mean": -1.714246354822535e-05,
"std": 0.031536102294921875,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_out.0.bias": {
"min": -0.18087971210479736,
"max": 0.12077755481004715,
"mean": -0.0023926026187837124,
"std": 0.04127210006117821,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.3.g": {
"min": 0.4229143261909485,
"max": 0.941786527633667,
"mean": 0.6626389026641846,
"std": 0.056811243295669556,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.4.ff.0.0.weight": {
"min": -0.37079188227653503,
"max": 0.47652140259742737,
"mean": -8.189280197257176e-05,
"std": 0.040888600051403046,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.5.4.ff.0.0.bias": {
"min": -0.20858491957187653,
"max": 0.027342500165104866,
"mean": -0.03023093193769455,
"std": 0.021366029977798462,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.5.4.ff.2.weight": {
"min": -0.3407646119594574,
"max": 0.7343085408210754,
"mean": 8.227993384934962e-05,
"std": 0.03476560488343239,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.5.4.ff.2.bias": {
"min": -0.2401275634765625,
"max": 0.05064300820231438,
"mean": -0.0011859382502734661,
"std": 0.020460018888115883,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.1.g": {
"min": 0.3059234321117401,
"max": 0.6536071300506592,
"mean": 0.5251041054725647,
"std": 0.046117961406707764,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_q.weight": {
"min": -0.30434539914131165,
"max": 0.21718497574329376,
"mean": 6.997769378358498e-05,
"std": 0.03949679434299469,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_q.bias": {
"min": -0.1491607427597046,
"max": 0.1309996247291565,
"mean": 0.00032534098136238754,
"std": 0.030453510582447052,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_k.weight": {
"min": -0.25696200132369995,
"max": 0.20183700323104858,
"mean": 3.1303323339670897e-05,
"std": 0.0394880436360836,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_k.bias": {
"min": -2.3362133502960205,
"max": 2.3758370876312256,
"mean": -0.026241015642881393,
"std": 0.4497620761394501,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_v.weight": {
"min": -0.1885133534669876,
"max": 0.21026504039764404,
"mean": 3.72500107914675e-05,
"std": 0.03479313850402832,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_v.bias": {
"min": -0.03166966885328293,
"max": 0.035711731761693954,
"mean": -0.00019632275507319719,
"std": 0.012291603721678257,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_out.0.weight": {
"min": -0.18826794624328613,
"max": 0.17029285430908203,
"mean": -6.840371497673914e-05,
"std": 0.03216983750462532,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_out.0.bias": {
"min": -0.13950176537036896,
"max": 0.13710856437683105,
"mean": -0.002513276878744364,
"std": 0.05129357427358627,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.3.g": {
"min": 0.46702930331230164,
"max": 0.9555635452270508,
"mean": 0.6688482761383057,
"std": 0.05276886373758316,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.4.ff.0.0.weight": {
"min": -0.3244642913341522,
"max": 0.30925843119621277,
"mean": -9.10853486857377e-07,
"std": 0.04094461724162102,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.6.4.ff.0.0.bias": {
"min": -0.12482384592294693,
"max": 0.02569793164730072,
"mean": -0.03068721666932106,
"std": 0.019822420552372932,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.6.4.ff.2.weight": {
"min": -0.43951860070228577,
"max": 0.4452158510684967,
"mean": 9.512923134025186e-05,
"std": 0.03511851280927658,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.6.4.ff.2.bias": {
"min": -0.22458022832870483,
"max": 0.051897209137678146,
"mean": -0.0011794487945735455,
"std": 0.018467247486114502,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.1.g": {
"min": 0.3391944468021393,
"max": 0.7399035096168518,
"mean": 0.558688759803772,
"std": 0.04139659181237221,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_q.weight": {
"min": -0.27298545837402344,
"max": 0.2789517045021057,
"mean": 2.041603875113651e-05,
"std": 0.041056908667087555,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_q.bias": {
"min": -0.13676847517490387,
"max": 0.1398179680109024,
"mean": 0.0004908779519610107,
"std": 0.026629263535141945,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_k.weight": {
"min": -0.49038437008857727,
"max": 0.35562369227409363,
"mean": 8.908439485821873e-05,
"std": 0.04069468006491661,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_k.bias": {
"min": -2.297020673751831,
"max": 1.7451350688934326,
"mean": -0.02108073979616165,
"std": 0.5001184940338135,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_v.weight": {
"min": -0.2181541919708252,
"max": 0.19748014211654663,
"mean": -4.031343632959761e-05,
"std": 0.034232787787914276,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_v.bias": {
"min": -0.0411330908536911,
"max": 0.03885316848754883,
"mean": -0.00013403715274762362,
"std": 0.012882057577371597,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_out.0.weight": {
"min": -0.17773869633674622,
"max": 0.18285222351551056,
"mean": 4.8017449444159865e-05,
"std": 0.03155619651079178,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_out.0.bias": {
"min": -0.18002314865589142,
"max": 0.18396146595478058,
"mean": -0.0022139688953757286,
"std": 0.05483314022421837,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.3.g": {
"min": 0.474223792552948,
"max": 1.025842308998108,
"mean": 0.6452140212059021,
"std": 0.05035461485385895,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.4.ff.0.0.weight": {
"min": -0.2715917229652405,
"max": 0.30928391218185425,
"mean": 0.00011250950046814978,
"std": 0.04068081080913544,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.7.4.ff.0.0.bias": {
"min": -0.10539427399635315,
"max": 0.026698507368564606,
"mean": -0.02951802872121334,
"std": 0.017934730276465416,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.7.4.ff.2.weight": {
"min": -0.3393958806991577,
"max": 0.3293214440345764,
"mean": 5.262523700366728e-05,
"std": 0.03441222757101059,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.7.4.ff.2.bias": {
"min": -0.18173128366470337,
"max": 0.04261557012796402,
"mean": -0.001059417612850666,
"std": 0.017207711935043335,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.1.g": {
"min": 0.32517319917678833,
"max": 0.6865989565849304,
"mean": 0.5111718773841858,
"std": 0.03694766014814377,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_q.weight": {
"min": -0.2340274453163147,
"max": 0.22541004419326782,
"mean": -3.624596502049826e-05,
"std": 0.039175089448690414,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_q.bias": {
"min": -0.11520740389823914,
"max": 0.1319286823272705,
"mean": 0.00015029977657832205,
"std": 0.029165174812078476,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_k.weight": {
"min": -0.3522850573062897,
"max": 0.28482842445373535,
"mean": 6.6099587456847075e-06,
"std": 0.03924406319856644,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_k.bias": {
"min": -4.132234573364258,
"max": 3.5437166690826416,
"mean": -0.011590443551540375,
"std": 0.6826013326644897,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_v.weight": {
"min": -0.21073928475379944,
"max": 0.20945559442043304,
"mean": 3.4624928957782686e-05,
"std": 0.03448405861854553,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_v.bias": {
"min": -0.035892292857170105,
"max": 0.0479779876768589,
"mean": 0.0007904525264166296,
"std": 0.012872384861111641,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_out.0.weight": {
"min": -0.21030081808567047,
"max": 0.19305069744586945,
"mean": -9.318873708252795e-07,
"std": 0.03169514983892441,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_out.0.bias": {
"min": -0.18656854331493378,
"max": 0.17726241052150726,
"mean": -0.002840438624843955,
"std": 0.0586128756403923,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.3.g": {
"min": 0.4746079444885254,
"max": 1.041317105293274,
"mean": 0.6513123512268066,
"std": 0.04965612292289734,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.4.ff.0.0.weight": {
"min": -0.24824990332126617,
"max": 0.32916077971458435,
"mean": 0.0001809034583857283,
"std": 0.04056909307837486,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.8.4.ff.0.0.bias": {
"min": -0.1252717822790146,
"max": 0.024853328242897987,
"mean": -0.03049679473042488,
"std": 0.01761467382311821,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.8.4.ff.2.weight": {
"min": -0.4204847514629364,
"max": 0.4814334511756897,
"mean": 1.0858502719202079e-06,
"std": 0.03539634868502617,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.8.4.ff.2.bias": {
"min": -0.1512894481420517,
"max": 0.0435330905020237,
"mean": 4.2967651097569615e-05,
"std": 0.014878639951348305,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.1.g": {
"min": 0.31564587354660034,
"max": 0.6816184520721436,
"mean": 0.5528937578201294,
"std": 0.04068783298134804,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_q.weight": {
"min": -0.20636627078056335,
"max": 0.2197655737400055,
"mean": 3.1909676181385294e-05,
"std": 0.038298994302749634,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_q.bias": {
"min": -0.13777659833431244,
"max": 0.11261031776666641,
"mean": 2.2643122065346688e-05,
"std": 0.025812044739723206,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_k.weight": {
"min": -0.40279680490493774,
"max": 0.3708725571632385,
"mean": 2.5475083020864986e-05,
"std": 0.03817913681268692,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_k.bias": {
"min": -3.770826816558838,
"max": 2.8686459064483643,
"mean": 0.001154756173491478,
"std": 0.5168185234069824,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_v.weight": {
"min": -0.20366711914539337,
"max": 0.1976872831583023,
"mean": 2.9746484869974665e-05,
"std": 0.03429698571562767,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_v.bias": {
"min": -0.050587497651576996,
"max": 0.039878759533166885,
"mean": -0.00042467116145417094,
"std": 0.013416356407105923,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_out.0.weight": {
"min": -0.19594806432724,
"max": 0.20180270075798035,
"mean": -1.2511954992078245e-05,
"std": 0.031805265694856644,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_out.0.bias": {
"min": -0.1929892897605896,
"max": 0.19512949883937836,
"mean": -0.002963980659842491,
"std": 0.06252874433994293,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.3.g": {
"min": 0.3488827645778656,
"max": 1.0837209224700928,
"mean": 0.6670882701873779,
"std": 0.05524449050426483,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.4.ff.0.0.weight": {
"min": -0.22563330829143524,
"max": 0.25133612751960754,
"mean": 0.00035861917422153056,
"std": 0.040758710354566574,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.9.4.ff.0.0.bias": {
"min": -0.09100860357284546,
"max": 0.04368036612868309,
"mean": -0.03007863275706768,
"std": 0.01761433854699135,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.9.4.ff.2.weight": {
"min": -0.35325002670288086,
"max": 0.3038857877254486,
"mean": -4.542069655144587e-05,
"std": 0.037121765315532684,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.9.4.ff.2.bias": {
"min": -0.16173334419727325,
"max": 0.06341976672410965,
"mean": -7.59128452045843e-05,
"std": 0.019423963502049446,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.1.g": {
"min": 0.348746657371521,
"max": 0.7219499945640564,
"mean": 0.5423322916030884,
"std": 0.03906194120645523,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_q.weight": {
"min": -0.21932680904865265,
"max": 0.22335435450077057,
"mean": -1.1452927537902724e-05,
"std": 0.03923005238175392,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_q.bias": {
"min": -0.11840008199214935,
"max": 0.1704910695552826,
"mean": 0.00028676993679255247,
"std": 0.025109266862273216,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_k.weight": {
"min": -0.24656711518764496,
"max": 0.30068346858024597,
"mean": -3.68916334991809e-05,
"std": 0.03892939165234566,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_k.bias": {
"min": -3.504953145980835,
"max": 3.7143990993499756,
"mean": 0.015847017988562584,
"std": 0.7823704481124878,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_v.weight": {
"min": -0.21910522878170013,
"max": 0.23737633228302002,
"mean": -1.3034959920332767e-05,
"std": 0.036302801221609116,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_v.bias": {
"min": -0.04721483215689659,
"max": 0.051370855420827866,
"mean": 0.00048040057299658656,
"std": 0.013522167690098286,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_out.0.weight": {
"min": -0.2142011672258377,
"max": 0.21717870235443115,
"mean": 5.644252087222412e-05,
"std": 0.03361529856920242,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_out.0.bias": {
"min": -0.21134759485721588,
"max": 0.23112934827804565,
"mean": -0.005099965259432793,
"std": 0.061861325055360794,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.3.g": {
"min": 0.361937016248703,
"max": 1.1009857654571533,
"mean": 0.6992422342300415,
"std": 0.053594909608364105,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.4.ff.0.0.weight": {
"min": -0.2350708544254303,
"max": 0.24471336603164673,
"mean": 0.00046341665438376367,
"std": 0.041268061846494675,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.10.4.ff.0.0.bias": {
"min": -0.0980960875749588,
"max": 0.06807035952806473,
"mean": -0.03142966330051422,
"std": 0.018127702176570892,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.10.4.ff.2.weight": {
"min": -0.30174583196640015,
"max": 0.3516803979873657,
"mean": -8.28510383144021e-05,
"std": 0.04027377441525459,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.10.4.ff.2.bias": {
"min": -0.1523003727197647,
"max": 0.1496732383966446,
"mean": 0.00026386568788439035,
"std": 0.023037536069750786,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.1.g": {
"min": 0.9992449879646301,
"max": 1.001513123512268,
"mean": 1.0000585317611694,
"std": 0.0006324834539555013,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_q.weight": {
"min": -0.031258270144462585,
"max": 0.031254518777132034,
"mean": -1.929036807268858e-05,
"std": 0.018040649592876434,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_q.bias": {
"min": -0.03122791275382042,
"max": 0.030987516045570374,
"mean": -0.0010841463226824999,
"std": 0.01795026659965515,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_k.weight": {
"min": -0.03125518560409546,
"max": 0.0312589630484581,
"mean": 3.5481098166201264e-06,
"std": 0.018041057512164116,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_k.bias": {
"min": -0.031153831630945206,
"max": 0.03117419220507145,
"mean": 0.00033391290344297886,
"std": 0.018062464892864227,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_out.0.bias": {
"min": -0.0006552772247232497,
"max": 0.0007129037985578179,
"mean": 5.131376383360475e-06,
"std": 0.0001946619595400989,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.3.g": {
"min": 0.997419536113739,
"max": 1.0028407573699951,
"mean": 0.9999656081199646,
"std": 0.000851841235999018,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.4.ff.0.0.weight": {
"min": -0.03356073051691055,
"max": 0.03384723141789436,
"mean": -5.6891162785177585e-06,
"std": 0.018047483637928963,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.11.4.ff.0.0.bias": {
"min": -0.03327289596199989,
"max": 0.03337877616286278,
"mean": -0.00020134463557042181,
"std": 0.017954064533114433,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.11.4.ff.2.weight": {
"min": -0.001495416508987546,
"max": 0.0016743302112445235,
"mean": 2.175480403820984e-06,
"std": 0.00029829132836312056,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.11.4.ff.2.bias": {
"min": -0.0005666155484504998,
"max": 0.0007540585356764495,
"mean": 8.17788895801641e-06,
"std": 0.00017612945521250367,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.1.g": {
"min": 0.3832930624485016,
"max": 0.7191212773323059,
"mean": 0.5806662440299988,
"std": 0.03885548189282417,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_q.weight": {
"min": -0.239033043384552,
"max": 0.19648200273513794,
"mean": 2.5991641450673342e-05,
"std": 0.03746527060866356,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_q.bias": {
"min": -0.11883819848299026,
"max": 0.1667412370443344,
"mean": 0.0009821474086493254,
"std": 0.02755241096019745,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_k.weight": {
"min": -0.24662744998931885,
"max": 0.4999285340309143,
"mean": -5.0414026190992445e-05,
"std": 0.037622544914484024,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_k.bias": {
"min": -3.941795825958252,
"max": 3.768937587738037,
"mean": -0.0035722628235816956,
"std": 0.681327760219574,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_v.weight": {
"min": -0.22736430168151855,
"max": 0.25185492634773254,
"mean": -1.1772199286497198e-05,
"std": 0.037433888763189316,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_v.bias": {
"min": -0.07156982272863388,
"max": 0.08060310035943985,
"mean": -0.0005125089664943516,
"std": 0.01565583609044552,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_out.0.weight": {
"min": -0.22800227999687195,
"max": 0.25769373774528503,
"mean": -2.863763802452013e-05,
"std": 0.035420775413513184,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_out.0.bias": {
"min": -0.20050473511219025,
"max": 0.2148960828781128,
"mean": -0.005524474661797285,
"std": 0.06832842528820038,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.3.g": {
"min": 0.405087411403656,
"max": 1.1892733573913574,
"mean": 0.7378814816474915,
"std": 0.05523177236318588,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.4.ff.0.0.weight": {
"min": -0.2209046483039856,
"max": 0.24561487138271332,
"mean": 0.000521098030731082,
"std": 0.041335128247737885,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.12.4.ff.0.0.bias": {
"min": -0.1032090112566948,
"max": 0.02416798658668995,
"mean": -0.032665450125932693,
"std": 0.018891815096139908,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.12.4.ff.2.weight": {
"min": -0.4496724605560303,
"max": 0.4224262237548828,
"mean": -0.0004358820151537657,
"std": 0.04689519852399826,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.12.4.ff.2.bias": {
"min": -0.2515088617801666,
"max": 0.47011902928352356,
"mean": 0.003207466099411249,
"std": 0.044524550437927246,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.0.weight": {
"min": -0.3168761134147644,
"max": 0.3331414461135864,
"mean": -2.506819146219641e-05,
"std": 0.02128741703927517,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.13.1.g": {
"min": 0.3245299160480499,
"max": 0.6855776906013489,
"mean": 0.5709930658340454,
"std": 0.04470643773674965,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_q.weight": {
"min": -0.1645486205816269,
"max": 0.1745065301656723,
"mean": -4.8789879656396806e-05,
"std": 0.03318168222904205,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_q.bias": {
"min": -0.18692335486412048,
"max": 0.14329002797603607,
"mean": 3.758035018108785e-05,
"std": 0.029700448736548424,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_k.weight": {
"min": -0.3810470402240753,
"max": 0.24586895108222961,
"mean": -9.737135769682936e-06,
"std": 0.03276293724775314,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_k.bias": {
"min": -3.6554298400878906,
"max": 3.2897167205810547,
"mean": -0.014251163229346275,
"std": 0.9850608110427856,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_v.weight": {
"min": -0.23475222289562225,
"max": 0.2473384439945221,
"mean": -1.814275310607627e-05,
"std": 0.041697416454553604,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_v.bias": {
"min": -0.0725652277469635,
"max": 0.15448249876499176,
"mean": 0.0006658083875663579,
"std": 0.02517012506723404,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_out.0.weight": {
"min": -0.2663499712944031,
"max": 0.2480984330177307,
"mean": -1.5296925994334742e-05,
"std": 0.04013863205909729,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_out.0.bias": {
"min": -0.18960432708263397,
"max": 0.194618359208107,
"mean": -0.0012379353865981102,
"std": 0.06668508052825928,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.3.g": {
"min": 0.32916781306266785,
"max": 0.9996783137321472,
"mean": 0.7191422581672668,
"std": 0.0523388646543026,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.4.ff.0.0.weight": {
"min": -0.23172040283679962,
"max": 0.2451343685388565,
"mean": 0.00018265256949234754,
"std": 0.04089942201972008,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.13.4.ff.0.0.bias": {
"min": -0.11451541632413864,
"max": 0.01910208724439144,
"mean": -0.04247751086950302,
"std": 0.0188636165112257,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.13.4.ff.2.weight": {
"min": -0.38971978425979614,
"max": 0.40751317143440247,
"mean": -2.1620868210447952e-05,
"std": 0.04853251948952675,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.13.4.ff.2.bias": {
"min": -0.6930332779884338,
"max": 0.4125932455062866,
"mean": 0.0008482532575726509,
"std": 0.06028350815176964,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.0.weight": {
"min": -0.0015386008890345693,
"max": 1.0007996559143066,
"mean": 0.00048813552712090313,
"std": 0.022089246660470963,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.14.1.g": {
"min": 0.9992700219154358,
"max": 1.0015240907669067,
"mean": 1.0000568628311157,
"std": 0.000619773636572063,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_q.weight": {
"min": -0.031252991408109665,
"max": 0.031256891787052155,
"mean": -2.1020092390244827e-05,
"std": 0.01803199015557766,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_q.bias": {
"min": -0.03121490404009819,
"max": 0.03123173676431179,
"mean": -0.0006769870524294674,
"std": 0.01782653108239174,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_k.weight": {
"min": -0.0312560498714447,
"max": 0.03126147389411926,
"mean": -8.831357263261452e-06,
"std": 0.01803101785480976,
"sparsity": 9.5367431640625e-07,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_k.bias": {
"min": -0.031231652945280075,
"max": 0.031244346871972084,
"mean": -0.0007297407719306648,
"std": 0.01794145628809929,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_out.0.bias": {
"min": -0.0005350728752091527,
"max": 0.0004281355068087578,
"mean": -3.930799721274525e-06,
"std": 0.00015574153803754598,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.3.g": {
"min": 0.9972792267799377,
"max": 1.0023835897445679,
"mean": 0.9995018243789673,
"std": 0.0008350047282874584,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.4.ff.0.0.weight": {
"min": -0.03338531777262688,
"max": 0.03282884135842323,
"mean": -2.971738467749674e-06,
"std": 0.018026772886514664,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.14.4.ff.0.0.bias": {
"min": -0.03250397369265556,
"max": 0.031224608421325684,
"mean": -0.0005561817670240998,
"std": 0.01803283393383026,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.14.4.ff.2.weight": {
"min": -0.001761053572408855,
"max": 0.0016201753169298172,
"mean": -9.977067065847223e-07,
"std": 0.00029509843443520367,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.14.4.ff.2.bias": {
"min": -0.0005179685540497303,
"max": 0.00046010586083866656,
"mean": -3.1889690035313834e-06,
"std": 0.00014008936705067754,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.0.weight": {
"min": -0.23426799476146698,
"max": 0.2724316120147705,
"mean": 6.618206498387735e-06,
"std": 0.01881008967757225,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.15.1.g": {
"min": 0.32140958309173584,
"max": 0.6938180923461914,
"mean": 0.58160400390625,
"std": 0.045936692506074905,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_q.weight": {
"min": -0.18184486031532288,
"max": 0.19783763587474823,
"mean": -1.1537180398590863e-05,
"std": 0.03318366780877113,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_q.bias": {
"min": -0.16044476628303528,
"max": 0.12933249771595,
"mean": -0.001071967650204897,
"std": 0.03413407504558563,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_k.weight": {
"min": -0.33228737115859985,
"max": 0.31113728880882263,
"mean": -1.0175894203712232e-05,
"std": 0.03223416581749916,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_k.bias": {
"min": -7.80244779586792,
"max": 8.761518478393555,
"mean": 0.093451589345932,
"std": 1.619434118270874,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_v.weight": {
"min": -0.23388099670410156,
"max": 0.2418091893196106,
"mean": 4.1715411498444155e-05,
"std": 0.04085543006658554,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_v.bias": {
"min": -0.07592413574457169,
"max": 0.06573085486888885,
"mean": 0.00048532572691328824,
"std": 0.019415952265262604,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_out.0.weight": {
"min": -0.2459113746881485,
"max": 0.23399382829666138,
"mean": -3.2584175642114133e-06,
"std": 0.039430178701877594,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_out.0.bias": {
"min": -0.1629519760608673,
"max": 0.16087952256202698,
"mean": 0.0016248535830527544,
"std": 0.06528551876544952,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.3.g": {
"min": 0.5571001172065735,
"max": 0.9435561299324036,
"mean": 0.712803840637207,
"std": 0.040119532495737076,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.4.ff.0.0.weight": {
"min": -0.2279409021139145,
"max": 0.25474709272384644,
"mean": -4.549993900582194e-05,
"std": 0.040573619306087494,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.15.4.ff.0.0.bias": {
"min": -0.13481706380844116,
"max": 0.02219359762966633,
"mean": -0.041350673884153366,
"std": 0.018385522067546844,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.15.4.ff.2.weight": {
"min": -0.42158395051956177,
"max": 0.3924521505832672,
"mean": -4.16895818489138e-06,
"std": 0.047782838344573975,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.15.4.ff.2.bias": {
"min": -0.607164204120636,
"max": 0.6512984037399292,
"mean": 0.0015855339588597417,
"std": 0.056834105402231216,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.0.weight": {
"min": -0.25181475281715393,
"max": 0.32078737020492554,
"mean": -6.139540346339345e-06,
"std": 0.019613103941082954,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.16.1.g": {
"min": 0.3595266342163086,
"max": 0.6821960806846619,
"mean": 0.5706722140312195,
"std": 0.042985353618860245,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_q.weight": {
"min": -0.2202295958995819,
"max": 0.177076518535614,
"mean": -3.443878813413903e-05,
"std": 0.03429801017045975,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_q.bias": {
"min": -0.16317804157733917,
"max": 0.23287786543369293,
"mean": 0.00035837513860315084,
"std": 0.03280922770500183,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_k.weight": {
"min": -0.2639525532722473,
"max": 0.23980671167373657,
"mean": -5.297175084706396e-05,
"std": 0.03389657661318779,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_k.bias": {
"min": -4.854193210601807,
"max": 5.090420722961426,
"mean": 0.043878111988306046,
"std": 1.2290726900100708,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_v.weight": {
"min": -0.24640515446662903,
"max": 0.250241219997406,
"mean": 7.21166143193841e-05,
"std": 0.043985553085803986,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_v.bias": {
"min": -0.06247914582490921,
"max": 0.054487086832523346,
"mean": 0.0006464287871494889,
"std": 0.017190182581543922,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_out.0.weight": {
"min": -0.2863953709602356,
"max": 0.27215418219566345,
"mean": -5.014354974264279e-05,
"std": 0.0429837629199028,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_out.0.bias": {
"min": -0.16105736792087555,
"max": 0.17032958567142487,
"mean": -0.0028887835796922445,
"std": 0.05930224433541298,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.3.g": {
"min": 0.5198467373847961,
"max": 0.9329147338867188,
"mean": 0.7133820652961731,
"std": 0.03842068091034889,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.4.ff.0.0.weight": {
"min": -0.23785468935966492,
"max": 0.2487422525882721,
"mean": 0.00046461093006655574,
"std": 0.04045235738158226,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.16.4.ff.0.0.bias": {
"min": -0.14500072598457336,
"max": 0.04102769121527672,
"mean": -0.039694253355264664,
"std": 0.020542506128549576,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.16.4.ff.2.weight": {
"min": -0.532442569732666,
"max": 0.5823614597320557,
"mean": 6.013309757690877e-06,
"std": 0.04885788634419441,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.16.4.ff.2.bias": {
"min": -0.5189021229743958,
"max": 0.4934021234512329,
"mean": 0.0023652694653719664,
"std": 0.05344180017709732,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.0.weight": {
"min": -0.2737113833427429,
"max": 0.3155929148197174,
"mean": 1.988332769542467e-06,
"std": 0.020049693062901497,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.17.1.g": {
"min": 0.3658766746520996,
"max": 0.7116788029670715,
"mean": 0.5931248664855957,
"std": 0.04595986381173134,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_q.weight": {
"min": -0.21088893711566925,
"max": 0.19901061058044434,
"mean": 3.061449388042092e-05,
"std": 0.0348670557141304,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_q.bias": {
"min": -0.18707768619060516,
"max": 0.20344795286655426,
"mean": 0.0009536991128697991,
"std": 0.03149910271167755,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_k.weight": {
"min": -0.2897132933139801,
"max": 0.3398728668689728,
"mean": -4.695481766248122e-05,
"std": 0.034587565809488297,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_k.bias": {
"min": -3.8768022060394287,
"max": 3.386897563934326,
"mean": 0.014455738477408886,
"std": 0.8582935929298401,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_v.weight": {
"min": -0.22446562349796295,
"max": 0.24974551796913147,
"mean": -3.865096914523747e-06,
"std": 0.042228855192661285,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_v.bias": {
"min": -0.055283673107624054,
"max": 0.046579472720623016,
"mean": -2.0229621441103518e-05,
"std": 0.015845011919736862,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_out.0.weight": {
"min": -0.2932588756084442,
"max": 0.29019662737846375,
"mean": -7.67192614148371e-06,
"std": 0.04194393754005432,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_out.0.bias": {
"min": -0.12487518787384033,
"max": 0.2589555084705353,
"mean": -0.0032450095750391483,
"std": 0.053175244480371475,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.3.g": {
"min": 0.45627039670944214,
"max": 0.8444806933403015,
"mean": 0.7054478526115417,
"std": 0.03522774204611778,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.4.ff.0.0.weight": {
"min": -0.512130856513977,
"max": 0.34817978739738464,
"mean": 0.00034297071397304535,
"std": 0.040197573602199554,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.17.4.ff.0.0.bias": {
"min": -0.18561507761478424,
"max": 0.039553456008434296,
"mean": -0.039388205856084824,
"std": 0.02135956473648548,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.17.4.ff.2.weight": {
"min": -0.5439714193344116,
"max": 0.5556594729423523,
"mean": -7.099103095242754e-05,
"std": 0.050732966512441635,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.17.4.ff.2.bias": {
"min": -0.5116639137268066,
"max": 0.6642246842384338,
"mean": 0.002442360855638981,
"std": 0.04952433332800865,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.0.weight": {
"min": -0.33249908685684204,
"max": 0.2653781771659851,
"mean": 3.2569464565312956e-06,
"std": 0.019386788830161095,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.18.1.g": {
"min": 0.3219698965549469,
"max": 0.766376256942749,
"mean": 0.651033878326416,
"std": 0.04532676190137863,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_q.weight": {
"min": -0.2498074471950531,
"max": 0.21987499296665192,
"mean": -1.9507724573486485e-06,
"std": 0.036501552909612656,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_q.bias": {
"min": -0.3268783390522003,
"max": 0.2866748869419098,
"mean": -0.0006870508659631014,
"std": 0.03855406492948532,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_k.weight": {
"min": -0.3101723790168762,
"max": 0.37016358971595764,
"mean": 6.504941848106682e-05,
"std": 0.03624220937490463,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_k.bias": {
"min": -4.7166595458984375,
"max": 5.806900978088379,
"mean": 0.03795350342988968,
"std": 1.4129759073257446,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_v.weight": {
"min": -0.22155693173408508,
"max": 0.2057628631591797,
"mean": -7.524936518166214e-05,
"std": 0.042484089732170105,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_v.bias": {
"min": -0.07764487713575363,
"max": 0.051462698727846146,
"mean": -0.000925063737668097,
"std": 0.0164109468460083,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_out.0.weight": {
"min": -0.33050650358200073,
"max": 0.329324871301651,
"mean": -4.5611386667587794e-06,
"std": 0.042790405452251434,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_out.0.bias": {
"min": -0.2847575545310974,
"max": 0.11197607964277267,
"mean": -0.0012040773872286081,
"std": 0.04701252654194832,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.3.g": {
"min": 0.48601120710372925,
"max": 0.8868346214294434,
"mean": 0.7373513579368591,
"std": 0.038241803646087646,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.4.ff.0.0.weight": {
"min": -0.3624517619609833,
"max": 0.27458682656288147,
"mean": 5.118873013998382e-05,
"std": 0.040643129497766495,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.18.4.ff.0.0.bias": {
"min": -0.24757687747478485,
"max": 0.046393755823373795,
"mean": -0.039262838661670685,
"std": 0.023290209472179413,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.18.4.ff.2.weight": {
"min": -0.626139223575592,
"max": 0.5965114235877991,
"mean": -6.056673373677768e-05,
"std": 0.0531148836016655,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.18.4.ff.2.bias": {
"min": -0.7093748450279236,
"max": 0.2657814621925354,
"mean": 0.0009187416289933026,
"std": 0.05122179910540581,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.0.weight": {
"min": -0.3433896005153656,
"max": 0.3037145733833313,
"mean": 3.0547948881576303e-07,
"std": 0.019135164096951485,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.19.1.g": {
"min": 0.34973248839378357,
"max": 0.7829060554504395,
"mean": 0.6387954354286194,
"std": 0.049250222742557526,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_q.weight": {
"min": -0.20535039901733398,
"max": 0.20685911178588867,
"mean": -5.973261431790888e-05,
"std": 0.03769532963633537,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_q.bias": {
"min": -0.25850412249565125,
"max": 0.2679128050804138,
"mean": -0.00040441699093207717,
"std": 0.044591374695301056,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_k.weight": {
"min": -0.354056179523468,
"max": 0.3223519027233124,
"mean": -6.86804014549125e-06,
"std": 0.03720388934016228,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_k.bias": {
"min": -5.260861873626709,
"max": 4.203889846801758,
"mean": -0.02641155757009983,
"std": 1.0066218376159668,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_v.weight": {
"min": -0.23860004544258118,
"max": 0.24336647987365723,
"mean": -2.503740142856259e-05,
"std": 0.043208908289670944,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_v.bias": {
"min": -0.06237001344561577,
"max": 0.05677289888262749,
"mean": 0.0003429377684369683,
"std": 0.014151404611766338,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_out.0.weight": {
"min": -0.43683916330337524,
"max": 0.37347522377967834,
"mean": 1.453105596738169e-05,
"std": 0.04412021487951279,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_out.0.bias": {
"min": -0.096480593085289,
"max": 0.17590999603271484,
"mean": -0.0006604294758290052,
"std": 0.03515587002038956,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.3.g": {
"min": 0.4216778874397278,
"max": 1.0693583488464355,
"mean": 0.7482997179031372,
"std": 0.04205985367298126,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.4.ff.0.0.weight": {
"min": -0.2665577530860901,
"max": 0.2968434989452362,
"mean": -7.962346717249602e-05,
"std": 0.040803126990795135,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.19.4.ff.0.0.bias": {
"min": -0.18574897944927216,
"max": 0.04386778548359871,
"mean": -0.036819178611040115,
"std": 0.02561137080192566,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.19.4.ff.2.weight": {
"min": -0.45699048042297363,
"max": 0.4864794611930847,
"mean": 4.341273597674444e-05,
"std": 0.05420761927962303,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.19.4.ff.2.bias": {
"min": -0.28645777702331543,
"max": 0.5512458086013794,
"mean": -0.0008799894712865353,
"std": 0.04782594367861748,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.0.weight": {
"min": -0.29278504848480225,
"max": 0.32276028394699097,
"mean": 6.534221029141918e-06,
"std": 0.019969386979937553,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.20.1.g": {
"min": 0.29091978073120117,
"max": 0.760124921798706,
"mean": 0.6508240699768066,
"std": 0.05213485658168793,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_q.weight": {
"min": -0.24355527758598328,
"max": 0.2617471516132355,
"mean": -6.045864211046137e-06,
"std": 0.03961271047592163,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_q.bias": {
"min": -0.2675487995147705,
"max": 0.19986717402935028,
"mean": -0.0008803302189335227,
"std": 0.051758527755737305,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_k.weight": {
"min": -0.2720382511615753,
"max": 0.25365304946899414,
"mean": 3.97135409002658e-06,
"std": 0.03870992362499237,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_k.bias": {
"min": -12.963478088378906,
"max": 15.945467948913574,
"mean": 0.03322439640760422,
"std": 1.988944411277771,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_v.weight": {
"min": -0.20726989209651947,
"max": 0.2258823961019516,
"mean": -7.221873966045678e-05,
"std": 0.04055318236351013,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_v.bias": {
"min": -0.06934336572885513,
"max": 0.06329023838043213,
"mean": 0.00015188338875304908,
"std": 0.014744000509381294,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_out.0.weight": {
"min": -0.46502813696861267,
"max": 0.3207668662071228,
"mean": 1.9557133782655e-05,
"std": 0.04058815911412239,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_out.0.bias": {
"min": -0.06403840333223343,
"max": 0.11518330872058868,
"mean": 0.001191072165966034,
"std": 0.02470429427921772,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.3.g": {
"min": 0.3746289610862732,
"max": 0.9322671294212341,
"mean": 0.7508296370506287,
"std": 0.040182456374168396,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.4.ff.0.0.weight": {
"min": -0.2793700397014618,
"max": 0.27312716841697693,
"mean": -0.00016854800924193114,
"std": 0.040993720293045044,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.20.4.ff.0.0.bias": {
"min": -0.19878964126110077,
"max": 0.050874363631010056,
"mean": -0.03202495723962784,
"std": 0.02511216513812542,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.20.4.ff.2.weight": {
"min": -0.6572921276092529,
"max": 0.5353701114654541,
"mean": -4.860567787545733e-05,
"std": 0.052844274789094925,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.20.4.ff.2.bias": {
"min": -0.19308353960514069,
"max": 0.5820099115371704,
"mean": -0.0005148603231646121,
"std": 0.04106666147708893,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.0.weight": {
"min": -0.41772764921188354,
"max": 0.3719545602798462,
"mean": 6.02346335654147e-06,
"std": 0.021620826795697212,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.21.1.g": {
"min": 0.21424666047096252,
"max": 0.7470943331718445,
"mean": 0.6495506763458252,
"std": 0.05437405779957771,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_q.weight": {
"min": -0.2095523476600647,
"max": 0.19568544626235962,
"mean": 4.010393604403362e-05,
"std": 0.03946491330862045,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_q.bias": {
"min": -0.32928818464279175,
"max": 0.2594093382358551,
"mean": -0.0032241325825452805,
"std": 0.05625630542635918,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_k.weight": {
"min": -0.2056845873594284,
"max": 0.254710853099823,
"mean": 5.4258445743471384e-05,
"std": 0.038567040115594864,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_k.bias": {
"min": -6.242719650268555,
"max": 6.931571006774902,
"mean": 0.04833323508501053,
"std": 1.384921908378601,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_v.weight": {
"min": -0.20961354672908783,
"max": 0.2300715446472168,
"mean": -5.3330231821746565e-06,
"std": 0.04131212830543518,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_v.bias": {
"min": -0.04391402378678322,
"max": 0.03599291667342186,
"mean": 3.6780984373763204e-06,
"std": 0.012800832279026508,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_out.0.weight": {
"min": -0.39794921875,
"max": 0.34475040435791016,
"mean": -5.557174881687388e-05,
"std": 0.0423884317278862,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_out.0.bias": {
"min": -0.055058449506759644,
"max": 0.06288675218820572,
"mean": 0.0003690638695843518,
"std": 0.018671618774533272,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.3.g": {
"min": 0.3500124216079712,
"max": 1.0451101064682007,
"mean": 0.789310097694397,
"std": 0.048743680119514465,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.4.ff.0.0.weight": {
"min": -0.33340734243392944,
"max": 0.3858667314052582,
"mean": -0.00016963679809123278,
"std": 0.04147941246628761,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.21.4.ff.0.0.bias": {
"min": -0.15730711817741394,
"max": 0.05913476645946503,
"mean": -0.031834498047828674,
"std": 0.025142161175608635,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.21.4.ff.2.weight": {
"min": -0.6963925361633301,
"max": 0.46865832805633545,
"mean": -9.133096318691969e-05,
"std": 0.05179010331630707,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.21.4.ff.2.bias": {
"min": -0.248288094997406,
"max": 0.3285192847251892,
"mean": -0.0002480646944604814,
"std": 0.04143183305859566,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.0.weight": {
"min": -0.2872416079044342,
"max": 0.35022279620170593,
"mean": -2.109378556269803e-06,
"std": 0.024238325655460358,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.22.1.g": {
"min": 0.19658105075359344,
"max": 0.7791422605514526,
"mean": 0.6702942848205566,
"std": 0.0586935319006443,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_q.weight": {
"min": -0.22860872745513916,
"max": 0.2311849147081375,
"mean": -1.9817682186840102e-05,
"std": 0.04044090211391449,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_q.bias": {
"min": -0.21965830028057098,
"max": 0.2406904250383377,
"mean": 0.0007772702374495566,
"std": 0.05579812079668045,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_k.weight": {
"min": -0.21554625034332275,
"max": 0.2266112118959427,
"mean": -7.155907223932445e-05,
"std": 0.03937710076570511,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_k.bias": {
"min": -8.904163360595703,
"max": 9.067035675048828,
"mean": -0.001250317320227623,
"std": 1.848069429397583,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_v.weight": {
"min": -0.26928046345710754,
"max": 0.2589084208011627,
"mean": 4.358497244538739e-05,
"std": 0.03840699419379234,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_v.bias": {
"min": -0.05760843679308891,
"max": 0.057633914053440094,
"mean": 0.0003498811274766922,
"std": 0.014721624553203583,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_out.0.weight": {
"min": -0.265085905790329,
"max": 0.2886793613433838,
"mean": -6.175917224027216e-05,
"std": 0.03907330706715584,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_out.0.bias": {
"min": -0.043753523379564285,
"max": 0.03726416453719139,
"mean": -8.701729530002922e-05,
"std": 0.013365592807531357,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.3.g": {
"min": 0.3394947946071625,
"max": 1.092633843421936,
"mean": 0.8636797666549683,
"std": 0.06384899467229843,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.4.ff.0.0.weight": {
"min": -0.42328590154647827,
"max": 0.4191039204597473,
"mean": 0.0003126378287561238,
"std": 0.043501876294612885,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.22.4.ff.0.0.bias": {
"min": -0.2147369235754013,
"max": 0.17059248685836792,
"mean": -0.029485618695616722,
"std": 0.03195330873131752,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.22.4.ff.2.weight": {
"min": -0.5996397733688354,
"max": 0.5595637559890747,
"mean": -0.00015250420256052166,
"std": 0.05344444885849953,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.22.4.ff.2.bias": {
"min": -0.17845340073108673,
"max": 0.37662389874458313,
"mean": 0.0013645882718265057,
"std": 0.037309858947992325,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.0.weight": {
"min": -0.3942789137363434,
"max": 0.36899739503860474,
"mean": 3.645062679424882e-05,
"std": 0.028621336445212364,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.23.1.g": {
"min": 0.2902868390083313,
"max": 0.8265326619148254,
"mean": 0.7055679559707642,
"std": 0.0678958147764206,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_q.weight": {
"min": -0.926041305065155,
"max": 1.026432991027832,
"mean": -2.5475666916463524e-05,
"std": 0.0476241335272789,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_q.bias": {
"min": -0.87814861536026,
"max": 0.8150070905685425,
"mean": -0.00031320619746111333,
"std": 0.09553563594818115,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_k.weight": {
"min": -0.2693868577480316,
"max": 0.24089287221431732,
"mean": -2.29374309128616e-05,
"std": 0.03895637392997742,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_k.bias": {
"min": -23.73939323425293,
"max": 22.84785270690918,
"mean": -0.0918712168931961,
"std": 4.0697784423828125,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_v.weight": {
"min": -0.22775250673294067,
"max": 0.24510256946086884,
"mean": -2.5825131160672754e-05,
"std": 0.03863884136080742,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_v.bias": {
"min": -0.06045493483543396,
"max": 0.04607832431793213,
"mean": -0.00014694462879560888,
"std": 0.01469829585403204,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_out.0.weight": {
"min": -0.33846479654312134,
"max": 0.37447792291641235,
"mean": 7.293592716450803e-06,
"std": 0.04081470146775246,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_out.0.bias": {
"min": -0.04649795591831207,
"max": 0.19573213160037994,
"mean": 0.00027208085521124303,
"std": 0.013573010452091694,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.3.g": {
"min": 0.37458330392837524,
"max": 1.1300410032272339,
"mean": 0.8900002241134644,
"std": 0.06398438662290573,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.4.ff.0.0.weight": {
"min": -0.4478272497653961,
"max": 0.5424814224243164,
"mean": 2.45622759393882e-05,
"std": 0.045566376298666,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.23.4.ff.0.0.bias": {
"min": -0.22404029965400696,
"max": 0.08835332095623016,
"mean": -0.032017190009355545,
"std": 0.03776315227150917,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.23.4.ff.2.weight": {
"min": -0.7251995801925659,
"max": 0.6892821788787842,
"mean": 3.438512794673443e-05,
"std": 0.05177679285407066,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.23.4.ff.2.bias": {
"min": -0.1745474934577942,
"max": 0.2185421884059906,
"mean": 4.038875340484083e-05,
"std": 0.03178102895617485,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.0.weight": {
"min": -0.3403666019439697,
"max": 0.3743104040622711,
"mean": 4.2970114009222016e-05,
"std": 0.03414527699351311,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.24.1.g": {
"min": 0.31756705045700073,
"max": 1.2868698835372925,
"mean": 0.6014533042907715,
"std": 0.08345934003591537,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_q.weight": {
"min": -0.28337857127189636,
"max": 0.26026472449302673,
"mean": -3.1064557788340608e-06,
"std": 0.03598480299115181,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_q.bias": {
"min": -0.23555569350719452,
"max": 0.2053573727607727,
"mean": 0.0002324726083315909,
"std": 0.05600997060537338,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_k.weight": {
"min": -0.4354943335056305,
"max": 0.3252315819263458,
"mean": 2.4552073227823712e-05,
"std": 0.03413620963692665,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_k.bias": {
"min": -5.544710159301758,
"max": 7.31260871887207,
"mean": -0.007366638630628586,
"std": 0.6992178559303284,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_v.weight": {
"min": -0.34383082389831543,
"max": 0.3635445833206177,
"mean": 0.00010339185246266425,
"std": 0.04782695323228836,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_v.bias": {
"min": -0.07375096529722214,
"max": 0.06034737080335617,
"mean": 0.000933139817789197,
"std": 0.014950517565011978,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_out.0.weight": {
"min": -0.2554619610309601,
"max": 0.28651097416877747,
"mean": 4.460267518879846e-06,
"std": 0.04155408963561058,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_out.0.bias": {
"min": -0.055337581783533096,
"max": 0.06284268200397491,
"mean": 0.00014179576828610152,
"std": 0.007177725899964571,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.3.g": {
"min": 0.4937240481376648,
"max": 1.2209070920944214,
"mean": 1.01340913772583,
"std": 0.11743401736021042,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.4.ff.0.0.weight": {
"min": -1.0935479402542114,
"max": 1.0468977689743042,
"mean": -4.9845290050143376e-05,
"std": 0.05240994319319725,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.24.4.ff.0.0.bias": {
"min": -0.22365206480026245,
"max": 0.17271095514297485,
"mean": -0.027249177917838097,
"std": 0.03635435923933983,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.24.4.ff.2.weight": {
"min": -0.8846310973167419,
"max": 0.9225372672080994,
"mean": -0.00014597778499592096,
"std": 0.053280774503946304,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.24.4.ff.2.bias": {
"min": -0.17102883756160736,
"max": 0.3799268901348114,
"mean": 0.0033686391543596983,
"std": 0.039900682866573334,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.0.weight": {
"min": -0.7772161960601807,
"max": 0.7236161828041077,
"mean": 1.9240971596445888e-05,
"std": 0.04616595432162285,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.25.1.g": {
"min": 0.33854806423187256,
"max": 1.4277222156524658,
"mean": 0.9483012557029724,
"std": 0.20673148334026337,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_q.weight": {
"min": -1.7455986738204956,
"max": 1.7045377492904663,
"mean": 0.00022702554997522384,
"std": 0.15868352353572845,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_q.bias": {
"min": -1.199636459350586,
"max": 1.0996308326721191,
"mean": -0.009536425583064556,
"std": 0.20382796227931976,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_k.weight": {
"min": -0.4213047921657562,
"max": 0.4262976348400116,
"mean": 6.459288124460727e-05,
"std": 0.04801792651414871,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_k.bias": {
"min": -19.743492126464844,
"max": 19.538597106933594,
"mean": -0.24829509854316711,
"std": 4.776083946228027,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_v.weight": {
"min": -0.3239092528820038,
"max": 0.43836328387260437,
"mean": -1.204050931846723e-05,
"std": 0.046160612255334854,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_v.bias": {
"min": -0.0340789370238781,
"max": 0.03713114559650421,
"mean": 0.0006417044205591083,
"std": 0.012921737506985664,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_out.0.weight": {
"min": -0.7034957408905029,
"max": 0.664257287979126,
"mean": 4.352344694780186e-05,
"std": 0.05788278207182884,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_out.0.bias": {
"min": -0.07222186028957367,
"max": 0.06749024242162704,
"mean": -0.00013264152221381664,
"std": 0.012920759618282318,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.3.g": {
"min": 0.38012510538101196,
"max": 1.3909755945205688,
"mean": 1.0665355920791626,
"std": 0.21970459818840027,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.4.ff.0.0.weight": {
"min": -0.6164048314094543,
"max": 0.7170195579528809,
"mean": 0.00011136491957586259,
"std": 0.05802035331726074,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.25.4.ff.0.0.bias": {
"min": -0.21974356472492218,
"max": 0.22506725788116455,
"mean": 0.006242978852242231,
"std": 0.04973088204860687,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.25.4.ff.2.weight": {
"min": -0.6296619176864624,
"max": 0.8891851902008057,
"mean": 1.1489293683553115e-05,
"std": 0.023526353761553764,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.25.4.ff.2.bias": {
"min": -0.5068330764770508,
"max": 0.4739985764026642,
"mean": -0.0030159270390868187,
"std": 0.06930534541606903,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.norm_out.g": {
"min": 0.5377116799354553,
"max": 1.180783748626709,
"mean": 0.7827296257019043,
"std": 0.09886873513460159,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.proj_out.weight": {
"min": -0.2669491767883301,
"max": 0.21265925467014313,
"mean": -0.00022343886666931212,
"std": 0.05399514362215996,
"sparsity": 0.0,
"shape": [
100,
1024
]
},
"transformer.proj_out.bias": {
"min": -0.23786094784736633,
"max": 0.014840648509562016,
"mean": -0.04396260902285576,
"std": 0.034334905445575714,
"sparsity": 0.0,
"shape": [
100
]
}
}
}