zombieot2 / model_analysis.json
memevis's picture
Upload folder using huggingface_hub
6c133be verified
{
"layer_types": {
"transformer": 391
},
"parameter_counts": {
"transformer.time_embed.time_mlp.0.weight": 262144,
"transformer.time_embed.time_mlp.0.bias": 1024,
"transformer.time_embed.time_mlp.2.weight": 1048576,
"transformer.time_embed.time_mlp.2.bias": 1024,
"transformer.text_embed.text_embed.weight": 254600,
"transformer.input_embed.proj.weight": 307200,
"transformer.input_embed.proj.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024,
"transformer.layers.0.1.g": 1024,
"transformer.layers.0.2.to_q.weight": 1048576,
"transformer.layers.0.2.to_q.bias": 1024,
"transformer.layers.0.2.to_k.weight": 1048576,
"transformer.layers.0.2.to_k.bias": 1024,
"transformer.layers.0.2.to_v.weight": 1048576,
"transformer.layers.0.2.to_v.bias": 1024,
"transformer.layers.0.2.to_out.0.weight": 1048576,
"transformer.layers.0.2.to_out.0.bias": 1024,
"transformer.layers.0.3.g": 1024,
"transformer.layers.0.4.ff.0.0.weight": 4194304,
"transformer.layers.0.4.ff.0.0.bias": 4096,
"transformer.layers.0.4.ff.2.weight": 4194304,
"transformer.layers.0.4.ff.2.bias": 1024,
"transformer.layers.1.1.g": 1024,
"transformer.layers.1.2.to_q.weight": 1048576,
"transformer.layers.1.2.to_q.bias": 1024,
"transformer.layers.1.2.to_k.weight": 1048576,
"transformer.layers.1.2.to_k.bias": 1024,
"transformer.layers.1.2.to_v.weight": 1048576,
"transformer.layers.1.2.to_v.bias": 1024,
"transformer.layers.1.2.to_out.0.weight": 1048576,
"transformer.layers.1.2.to_out.0.bias": 1024,
"transformer.layers.1.3.g": 1024,
"transformer.layers.1.4.ff.0.0.weight": 4194304,
"transformer.layers.1.4.ff.0.0.bias": 4096,
"transformer.layers.1.4.ff.2.weight": 4194304,
"transformer.layers.1.4.ff.2.bias": 1024,
"transformer.layers.2.1.g": 1024,
"transformer.layers.2.2.to_q.weight": 1048576,
"transformer.layers.2.2.to_q.bias": 1024,
"transformer.layers.2.2.to_k.weight": 1048576,
"transformer.layers.2.2.to_k.bias": 1024,
"transformer.layers.2.2.to_v.weight": 1048576,
"transformer.layers.2.2.to_v.bias": 1024,
"transformer.layers.2.2.to_out.0.weight": 1048576,
"transformer.layers.2.2.to_out.0.bias": 1024,
"transformer.layers.2.3.g": 1024,
"transformer.layers.2.4.ff.0.0.weight": 4194304,
"transformer.layers.2.4.ff.0.0.bias": 4096,
"transformer.layers.2.4.ff.2.weight": 4194304,
"transformer.layers.2.4.ff.2.bias": 1024,
"transformer.layers.3.1.g": 1024,
"transformer.layers.3.2.to_q.weight": 1048576,
"transformer.layers.3.2.to_q.bias": 1024,
"transformer.layers.3.2.to_k.weight": 1048576,
"transformer.layers.3.2.to_k.bias": 1024,
"transformer.layers.3.2.to_v.weight": 1048576,
"transformer.layers.3.2.to_v.bias": 1024,
"transformer.layers.3.2.to_out.0.weight": 1048576,
"transformer.layers.3.2.to_out.0.bias": 1024,
"transformer.layers.3.3.g": 1024,
"transformer.layers.3.4.ff.0.0.weight": 4194304,
"transformer.layers.3.4.ff.0.0.bias": 4096,
"transformer.layers.3.4.ff.2.weight": 4194304,
"transformer.layers.3.4.ff.2.bias": 1024,
"transformer.layers.4.1.g": 1024,
"transformer.layers.4.2.to_q.weight": 1048576,
"transformer.layers.4.2.to_q.bias": 1024,
"transformer.layers.4.2.to_k.weight": 1048576,
"transformer.layers.4.2.to_k.bias": 1024,
"transformer.layers.4.2.to_v.weight": 1048576,
"transformer.layers.4.2.to_v.bias": 1024,
"transformer.layers.4.2.to_out.0.weight": 1048576,
"transformer.layers.4.2.to_out.0.bias": 1024,
"transformer.layers.4.3.g": 1024,
"transformer.layers.4.4.ff.0.0.weight": 4194304,
"transformer.layers.4.4.ff.0.0.bias": 4096,
"transformer.layers.4.4.ff.2.weight": 4194304,
"transformer.layers.4.4.ff.2.bias": 1024,
"transformer.layers.5.1.g": 1024,
"transformer.layers.5.2.to_q.weight": 1048576,
"transformer.layers.5.2.to_q.bias": 1024,
"transformer.layers.5.2.to_k.weight": 1048576,
"transformer.layers.5.2.to_k.bias": 1024,
"transformer.layers.5.2.to_v.weight": 1048576,
"transformer.layers.5.2.to_v.bias": 1024,
"transformer.layers.5.2.to_out.0.weight": 1048576,
"transformer.layers.5.2.to_out.0.bias": 1024,
"transformer.layers.5.3.g": 1024,
"transformer.layers.5.4.ff.0.0.weight": 4194304,
"transformer.layers.5.4.ff.0.0.bias": 4096,
"transformer.layers.5.4.ff.2.weight": 4194304,
"transformer.layers.5.4.ff.2.bias": 1024,
"transformer.layers.6.1.g": 1024,
"transformer.layers.6.2.to_q.weight": 1048576,
"transformer.layers.6.2.to_q.bias": 1024,
"transformer.layers.6.2.to_k.weight": 1048576,
"transformer.layers.6.2.to_k.bias": 1024,
"transformer.layers.6.2.to_v.weight": 1048576,
"transformer.layers.6.2.to_v.bias": 1024,
"transformer.layers.6.2.to_out.0.weight": 1048576,
"transformer.layers.6.2.to_out.0.bias": 1024,
"transformer.layers.6.3.g": 1024,
"transformer.layers.6.4.ff.0.0.weight": 4194304,
"transformer.layers.6.4.ff.0.0.bias": 4096,
"transformer.layers.6.4.ff.2.weight": 4194304,
"transformer.layers.6.4.ff.2.bias": 1024,
"transformer.layers.7.1.g": 1024,
"transformer.layers.7.2.to_q.weight": 1048576,
"transformer.layers.7.2.to_q.bias": 1024,
"transformer.layers.7.2.to_k.weight": 1048576,
"transformer.layers.7.2.to_k.bias": 1024,
"transformer.layers.7.2.to_v.weight": 1048576,
"transformer.layers.7.2.to_v.bias": 1024,
"transformer.layers.7.2.to_out.0.weight": 1048576,
"transformer.layers.7.2.to_out.0.bias": 1024,
"transformer.layers.7.3.g": 1024,
"transformer.layers.7.4.ff.0.0.weight": 4194304,
"transformer.layers.7.4.ff.0.0.bias": 4096,
"transformer.layers.7.4.ff.2.weight": 4194304,
"transformer.layers.7.4.ff.2.bias": 1024,
"transformer.layers.8.1.g": 1024,
"transformer.layers.8.2.to_q.weight": 1048576,
"transformer.layers.8.2.to_q.bias": 1024,
"transformer.layers.8.2.to_k.weight": 1048576,
"transformer.layers.8.2.to_k.bias": 1024,
"transformer.layers.8.2.to_v.weight": 1048576,
"transformer.layers.8.2.to_v.bias": 1024,
"transformer.layers.8.2.to_out.0.weight": 1048576,
"transformer.layers.8.2.to_out.0.bias": 1024,
"transformer.layers.8.3.g": 1024,
"transformer.layers.8.4.ff.0.0.weight": 4194304,
"transformer.layers.8.4.ff.0.0.bias": 4096,
"transformer.layers.8.4.ff.2.weight": 4194304,
"transformer.layers.8.4.ff.2.bias": 1024,
"transformer.layers.9.1.g": 1024,
"transformer.layers.9.2.to_q.weight": 1048576,
"transformer.layers.9.2.to_q.bias": 1024,
"transformer.layers.9.2.to_k.weight": 1048576,
"transformer.layers.9.2.to_k.bias": 1024,
"transformer.layers.9.2.to_v.weight": 1048576,
"transformer.layers.9.2.to_v.bias": 1024,
"transformer.layers.9.2.to_out.0.weight": 1048576,
"transformer.layers.9.2.to_out.0.bias": 1024,
"transformer.layers.9.3.g": 1024,
"transformer.layers.9.4.ff.0.0.weight": 4194304,
"transformer.layers.9.4.ff.0.0.bias": 4096,
"transformer.layers.9.4.ff.2.weight": 4194304,
"transformer.layers.9.4.ff.2.bias": 1024,
"transformer.layers.10.1.g": 1024,
"transformer.layers.10.2.to_q.weight": 1048576,
"transformer.layers.10.2.to_q.bias": 1024,
"transformer.layers.10.2.to_k.weight": 1048576,
"transformer.layers.10.2.to_k.bias": 1024,
"transformer.layers.10.2.to_v.weight": 1048576,
"transformer.layers.10.2.to_v.bias": 1024,
"transformer.layers.10.2.to_out.0.weight": 1048576,
"transformer.layers.10.2.to_out.0.bias": 1024,
"transformer.layers.10.3.g": 1024,
"transformer.layers.10.4.ff.0.0.weight": 4194304,
"transformer.layers.10.4.ff.0.0.bias": 4096,
"transformer.layers.10.4.ff.2.weight": 4194304,
"transformer.layers.10.4.ff.2.bias": 1024,
"transformer.layers.11.1.g": 1024,
"transformer.layers.11.2.to_q.weight": 1048576,
"transformer.layers.11.2.to_q.bias": 1024,
"transformer.layers.11.2.to_k.weight": 1048576,
"transformer.layers.11.2.to_k.bias": 1024,
"transformer.layers.11.2.to_v.weight": 1048576,
"transformer.layers.11.2.to_v.bias": 1024,
"transformer.layers.11.2.to_out.0.weight": 1048576,
"transformer.layers.11.2.to_out.0.bias": 1024,
"transformer.layers.11.3.g": 1024,
"transformer.layers.11.4.ff.0.0.weight": 4194304,
"transformer.layers.11.4.ff.0.0.bias": 4096,
"transformer.layers.11.4.ff.2.weight": 4194304,
"transformer.layers.11.4.ff.2.bias": 1024,
"transformer.layers.12.1.g": 1024,
"transformer.layers.12.2.to_q.weight": 1048576,
"transformer.layers.12.2.to_q.bias": 1024,
"transformer.layers.12.2.to_k.weight": 1048576,
"transformer.layers.12.2.to_k.bias": 1024,
"transformer.layers.12.2.to_v.weight": 1048576,
"transformer.layers.12.2.to_v.bias": 1024,
"transformer.layers.12.2.to_out.0.weight": 1048576,
"transformer.layers.12.2.to_out.0.bias": 1024,
"transformer.layers.12.3.g": 1024,
"transformer.layers.12.4.ff.0.0.weight": 4194304,
"transformer.layers.12.4.ff.0.0.bias": 4096,
"transformer.layers.12.4.ff.2.weight": 4194304,
"transformer.layers.12.4.ff.2.bias": 1024,
"transformer.layers.13.0.weight": 2097152,
"transformer.layers.13.1.g": 1024,
"transformer.layers.13.2.to_q.weight": 1048576,
"transformer.layers.13.2.to_q.bias": 1024,
"transformer.layers.13.2.to_k.weight": 1048576,
"transformer.layers.13.2.to_k.bias": 1024,
"transformer.layers.13.2.to_v.weight": 1048576,
"transformer.layers.13.2.to_v.bias": 1024,
"transformer.layers.13.2.to_out.0.weight": 1048576,
"transformer.layers.13.2.to_out.0.bias": 1024,
"transformer.layers.13.3.g": 1024,
"transformer.layers.13.4.ff.0.0.weight": 4194304,
"transformer.layers.13.4.ff.0.0.bias": 4096,
"transformer.layers.13.4.ff.2.weight": 4194304,
"transformer.layers.13.4.ff.2.bias": 1024,
"transformer.layers.14.0.weight": 2097152,
"transformer.layers.14.1.g": 1024,
"transformer.layers.14.2.to_q.weight": 1048576,
"transformer.layers.14.2.to_q.bias": 1024,
"transformer.layers.14.2.to_k.weight": 1048576,
"transformer.layers.14.2.to_k.bias": 1024,
"transformer.layers.14.2.to_v.weight": 1048576,
"transformer.layers.14.2.to_v.bias": 1024,
"transformer.layers.14.2.to_out.0.weight": 1048576,
"transformer.layers.14.2.to_out.0.bias": 1024,
"transformer.layers.14.3.g": 1024,
"transformer.layers.14.4.ff.0.0.weight": 4194304,
"transformer.layers.14.4.ff.0.0.bias": 4096,
"transformer.layers.14.4.ff.2.weight": 4194304,
"transformer.layers.14.4.ff.2.bias": 1024,
"transformer.layers.15.0.weight": 2097152,
"transformer.layers.15.1.g": 1024,
"transformer.layers.15.2.to_q.weight": 1048576,
"transformer.layers.15.2.to_q.bias": 1024,
"transformer.layers.15.2.to_k.weight": 1048576,
"transformer.layers.15.2.to_k.bias": 1024,
"transformer.layers.15.2.to_v.weight": 1048576,
"transformer.layers.15.2.to_v.bias": 1024,
"transformer.layers.15.2.to_out.0.weight": 1048576,
"transformer.layers.15.2.to_out.0.bias": 1024,
"transformer.layers.15.3.g": 1024,
"transformer.layers.15.4.ff.0.0.weight": 4194304,
"transformer.layers.15.4.ff.0.0.bias": 4096,
"transformer.layers.15.4.ff.2.weight": 4194304,
"transformer.layers.15.4.ff.2.bias": 1024,
"transformer.layers.16.0.weight": 2097152,
"transformer.layers.16.1.g": 1024,
"transformer.layers.16.2.to_q.weight": 1048576,
"transformer.layers.16.2.to_q.bias": 1024,
"transformer.layers.16.2.to_k.weight": 1048576,
"transformer.layers.16.2.to_k.bias": 1024,
"transformer.layers.16.2.to_v.weight": 1048576,
"transformer.layers.16.2.to_v.bias": 1024,
"transformer.layers.16.2.to_out.0.weight": 1048576,
"transformer.layers.16.2.to_out.0.bias": 1024,
"transformer.layers.16.3.g": 1024,
"transformer.layers.16.4.ff.0.0.weight": 4194304,
"transformer.layers.16.4.ff.0.0.bias": 4096,
"transformer.layers.16.4.ff.2.weight": 4194304,
"transformer.layers.16.4.ff.2.bias": 1024,
"transformer.layers.17.0.weight": 2097152,
"transformer.layers.17.1.g": 1024,
"transformer.layers.17.2.to_q.weight": 1048576,
"transformer.layers.17.2.to_q.bias": 1024,
"transformer.layers.17.2.to_k.weight": 1048576,
"transformer.layers.17.2.to_k.bias": 1024,
"transformer.layers.17.2.to_v.weight": 1048576,
"transformer.layers.17.2.to_v.bias": 1024,
"transformer.layers.17.2.to_out.0.weight": 1048576,
"transformer.layers.17.2.to_out.0.bias": 1024,
"transformer.layers.17.3.g": 1024,
"transformer.layers.17.4.ff.0.0.weight": 4194304,
"transformer.layers.17.4.ff.0.0.bias": 4096,
"transformer.layers.17.4.ff.2.weight": 4194304,
"transformer.layers.17.4.ff.2.bias": 1024,
"transformer.layers.18.0.weight": 2097152,
"transformer.layers.18.1.g": 1024,
"transformer.layers.18.2.to_q.weight": 1048576,
"transformer.layers.18.2.to_q.bias": 1024,
"transformer.layers.18.2.to_k.weight": 1048576,
"transformer.layers.18.2.to_k.bias": 1024,
"transformer.layers.18.2.to_v.weight": 1048576,
"transformer.layers.18.2.to_v.bias": 1024,
"transformer.layers.18.2.to_out.0.weight": 1048576,
"transformer.layers.18.2.to_out.0.bias": 1024,
"transformer.layers.18.3.g": 1024,
"transformer.layers.18.4.ff.0.0.weight": 4194304,
"transformer.layers.18.4.ff.0.0.bias": 4096,
"transformer.layers.18.4.ff.2.weight": 4194304,
"transformer.layers.18.4.ff.2.bias": 1024,
"transformer.layers.19.0.weight": 2097152,
"transformer.layers.19.1.g": 1024,
"transformer.layers.19.2.to_q.weight": 1048576,
"transformer.layers.19.2.to_q.bias": 1024,
"transformer.layers.19.2.to_k.weight": 1048576,
"transformer.layers.19.2.to_k.bias": 1024,
"transformer.layers.19.2.to_v.weight": 1048576,
"transformer.layers.19.2.to_v.bias": 1024,
"transformer.layers.19.2.to_out.0.weight": 1048576,
"transformer.layers.19.2.to_out.0.bias": 1024,
"transformer.layers.19.3.g": 1024,
"transformer.layers.19.4.ff.0.0.weight": 4194304,
"transformer.layers.19.4.ff.0.0.bias": 4096,
"transformer.layers.19.4.ff.2.weight": 4194304,
"transformer.layers.19.4.ff.2.bias": 1024,
"transformer.layers.20.0.weight": 2097152,
"transformer.layers.20.1.g": 1024,
"transformer.layers.20.2.to_q.weight": 1048576,
"transformer.layers.20.2.to_q.bias": 1024,
"transformer.layers.20.2.to_k.weight": 1048576,
"transformer.layers.20.2.to_k.bias": 1024,
"transformer.layers.20.2.to_v.weight": 1048576,
"transformer.layers.20.2.to_v.bias": 1024,
"transformer.layers.20.2.to_out.0.weight": 1048576,
"transformer.layers.20.2.to_out.0.bias": 1024,
"transformer.layers.20.3.g": 1024,
"transformer.layers.20.4.ff.0.0.weight": 4194304,
"transformer.layers.20.4.ff.0.0.bias": 4096,
"transformer.layers.20.4.ff.2.weight": 4194304,
"transformer.layers.20.4.ff.2.bias": 1024,
"transformer.layers.21.0.weight": 2097152,
"transformer.layers.21.1.g": 1024,
"transformer.layers.21.2.to_q.weight": 1048576,
"transformer.layers.21.2.to_q.bias": 1024,
"transformer.layers.21.2.to_k.weight": 1048576,
"transformer.layers.21.2.to_k.bias": 1024,
"transformer.layers.21.2.to_v.weight": 1048576,
"transformer.layers.21.2.to_v.bias": 1024,
"transformer.layers.21.2.to_out.0.weight": 1048576,
"transformer.layers.21.2.to_out.0.bias": 1024,
"transformer.layers.21.3.g": 1024,
"transformer.layers.21.4.ff.0.0.weight": 4194304,
"transformer.layers.21.4.ff.0.0.bias": 4096,
"transformer.layers.21.4.ff.2.weight": 4194304,
"transformer.layers.21.4.ff.2.bias": 1024,
"transformer.layers.22.0.weight": 2097152,
"transformer.layers.22.1.g": 1024,
"transformer.layers.22.2.to_q.weight": 1048576,
"transformer.layers.22.2.to_q.bias": 1024,
"transformer.layers.22.2.to_k.weight": 1048576,
"transformer.layers.22.2.to_k.bias": 1024,
"transformer.layers.22.2.to_v.weight": 1048576,
"transformer.layers.22.2.to_v.bias": 1024,
"transformer.layers.22.2.to_out.0.weight": 1048576,
"transformer.layers.22.2.to_out.0.bias": 1024,
"transformer.layers.22.3.g": 1024,
"transformer.layers.22.4.ff.0.0.weight": 4194304,
"transformer.layers.22.4.ff.0.0.bias": 4096,
"transformer.layers.22.4.ff.2.weight": 4194304,
"transformer.layers.22.4.ff.2.bias": 1024,
"transformer.layers.23.0.weight": 2097152,
"transformer.layers.23.1.g": 1024,
"transformer.layers.23.2.to_q.weight": 1048576,
"transformer.layers.23.2.to_q.bias": 1024,
"transformer.layers.23.2.to_k.weight": 1048576,
"transformer.layers.23.2.to_k.bias": 1024,
"transformer.layers.23.2.to_v.weight": 1048576,
"transformer.layers.23.2.to_v.bias": 1024,
"transformer.layers.23.2.to_out.0.weight": 1048576,
"transformer.layers.23.2.to_out.0.bias": 1024,
"transformer.layers.23.3.g": 1024,
"transformer.layers.23.4.ff.0.0.weight": 4194304,
"transformer.layers.23.4.ff.0.0.bias": 4096,
"transformer.layers.23.4.ff.2.weight": 4194304,
"transformer.layers.23.4.ff.2.bias": 1024,
"transformer.layers.24.0.weight": 2097152,
"transformer.layers.24.1.g": 1024,
"transformer.layers.24.2.to_q.weight": 1048576,
"transformer.layers.24.2.to_q.bias": 1024,
"transformer.layers.24.2.to_k.weight": 1048576,
"transformer.layers.24.2.to_k.bias": 1024,
"transformer.layers.24.2.to_v.weight": 1048576,
"transformer.layers.24.2.to_v.bias": 1024,
"transformer.layers.24.2.to_out.0.weight": 1048576,
"transformer.layers.24.2.to_out.0.bias": 1024,
"transformer.layers.24.3.g": 1024,
"transformer.layers.24.4.ff.0.0.weight": 4194304,
"transformer.layers.24.4.ff.0.0.bias": 4096,
"transformer.layers.24.4.ff.2.weight": 4194304,
"transformer.layers.24.4.ff.2.bias": 1024,
"transformer.layers.25.0.weight": 2097152,
"transformer.layers.25.1.g": 1024,
"transformer.layers.25.2.to_q.weight": 1048576,
"transformer.layers.25.2.to_q.bias": 1024,
"transformer.layers.25.2.to_k.weight": 1048576,
"transformer.layers.25.2.to_k.bias": 1024,
"transformer.layers.25.2.to_v.weight": 1048576,
"transformer.layers.25.2.to_v.bias": 1024,
"transformer.layers.25.2.to_out.0.weight": 1048576,
"transformer.layers.25.2.to_out.0.bias": 1024,
"transformer.layers.25.3.g": 1024,
"transformer.layers.25.4.ff.0.0.weight": 4194304,
"transformer.layers.25.4.ff.0.0.bias": 4096,
"transformer.layers.25.4.ff.2.weight": 4194304,
"transformer.layers.25.4.ff.2.bias": 1024,
"transformer.norm_out.g": 1024,
"transformer.proj_out.weight": 102400,
"transformer.proj_out.bias": 100
},
"important_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
],
"bottleneck_layers": [],
"recommendations": {
"focus_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
]
},
"total_parameters": 391,
"total_elements": 360755948,
"param_ranges": {
"transformer.time_embed.time_mlp.0.weight": {
"min": -0.43014463782310486,
"max": 0.2980782687664032,
"mean": -0.002543725073337555,
"std": 0.04256265610456467,
"sparsity": 0.0,
"shape": [
1024,
256
]
},
"transformer.time_embed.time_mlp.0.bias": {
"min": -0.0628998726606369,
"max": 0.1072736531496048,
"mean": 0.0006290247547440231,
"std": 0.034041259437799454,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.time_embed.time_mlp.2.weight": {
"min": -0.41270536184310913,
"max": 0.8369129300117493,
"mean": -0.00020170127390883863,
"std": 0.024111710488796234,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.time_embed.time_mlp.2.bias": {
"min": -0.11501855403184891,
"max": 0.3208469748497009,
"mean": -0.0009418133413419127,
"std": 0.019536493346095085,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.text_embed.text_embed.weight": {
"min": -2.7886247634887695,
"max": 2.8676700592041016,
"mean": -0.0003673351602628827,
"std": 0.6154847145080566,
"sparsity": 0.0,
"shape": [
2546,
100
]
},
"transformer.input_embed.proj.weight": {
"min": -0.27889013290405273,
"max": 0.38151732087135315,
"mean": 0.0004236791573930532,
"std": 0.04274853691458702,
"sparsity": 0.0,
"shape": [
1024,
300
]
},
"transformer.input_embed.proj.bias": {
"min": -0.2219879925251007,
"max": 0.2091645449399948,
"mean": -0.004480332136154175,
"std": 0.040872007608413696,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": {
"min": -0.42831405997276306,
"max": 0.47610175609588623,
"mean": 3.7659003737644525e-06,
"std": 0.024510981515049934,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": {
"min": -0.3244315981864929,
"max": 0.15647757053375244,
"mean": -0.046661682426929474,
"std": 0.05150889977812767,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": {
"min": -0.4104415476322174,
"max": 0.3546721041202545,
"mean": -0.00013054230657871813,
"std": 0.02360478602349758,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": {
"min": -0.22924789786338806,
"max": 0.2620227038860321,
"mean": -0.029105938971042633,
"std": 0.04928705468773842,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.1.g": {
"min": 0.2546031177043915,
"max": 0.8185229301452637,
"mean": 0.5252923965454102,
"std": 0.08049347996711731,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_q.weight": {
"min": -0.296941339969635,
"max": 0.2655627429485321,
"mean": -0.0004258690751157701,
"std": 0.03210259974002838,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_q.bias": {
"min": -0.09266690164804459,
"max": 0.12469176203012466,
"mean": 0.0006477286806330085,
"std": 0.025720255449414253,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_k.weight": {
"min": -0.2905982434749603,
"max": 0.28104421496391296,
"mean": -7.510318391723558e-05,
"std": 0.03093179315328598,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_k.bias": {
"min": -5.890929698944092,
"max": 5.805842876434326,
"mean": -0.009318170137703419,
"std": 1.2943130731582642,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_v.weight": {
"min": -0.42498156428337097,
"max": 0.3436700105667114,
"mean": 9.804974979488179e-05,
"std": 0.029953550547361374,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_v.bias": {
"min": -0.029002565890550613,
"max": 0.027599314227700233,
"mean": -0.0003237572673242539,
"std": 0.01257046777755022,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_out.0.weight": {
"min": -0.45393431186676025,
"max": 0.44807320833206177,
"mean": 2.389570181549061e-05,
"std": 0.023853935301303864,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_out.0.bias": {
"min": -0.0885927751660347,
"max": 0.09089276939630508,
"mean": 0.0022863608319312334,
"std": 0.019503755494952202,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.3.g": {
"min": 0.2667909264564514,
"max": 1.0541586875915527,
"mean": 0.5309650301933289,
"std": 0.10402658581733704,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.4.ff.0.0.weight": {
"min": -0.5743634104728699,
"max": 0.6081749796867371,
"mean": -0.0004296167753636837,
"std": 0.03860084339976311,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.0.4.ff.0.0.bias": {
"min": -0.18247899413108826,
"max": 0.04562002047896385,
"mean": -0.029428046196699142,
"std": 0.04256246238946915,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.0.4.ff.2.weight": {
"min": -1.1666945219039917,
"max": 1.633580207824707,
"mean": 0.00032344614737667143,
"std": 0.027696726843714714,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.0.4.ff.2.bias": {
"min": -0.16206279397010803,
"max": 0.20534056425094604,
"mean": -0.02111881598830223,
"std": 0.027917111292481422,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.1.g": {
"min": 0.22404542565345764,
"max": 0.8422443866729736,
"mean": 0.4874877631664276,
"std": 0.07493799924850464,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_q.weight": {
"min": -0.255166620016098,
"max": 0.305690199136734,
"mean": -6.7684013629332185e-06,
"std": 0.03347513824701309,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_q.bias": {
"min": -0.09524397552013397,
"max": 0.11034096777439117,
"mean": 6.5918720792979e-05,
"std": 0.026950189843773842,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_k.weight": {
"min": -0.29684391617774963,
"max": 0.295682817697525,
"mean": 5.335842797649093e-05,
"std": 0.03254625201225281,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_k.bias": {
"min": -5.156938552856445,
"max": 5.0772905349731445,
"mean": -0.014555896632373333,
"std": 1.1561553478240967,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_v.weight": {
"min": -0.3448536694049835,
"max": 0.34325698018074036,
"mean": 7.860038749640808e-05,
"std": 0.0300619974732399,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_v.bias": {
"min": -0.03601115196943283,
"max": 0.03331650421023369,
"mean": -0.0001408920797985047,
"std": 0.013034623116254807,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_out.0.weight": {
"min": -0.31532466411590576,
"max": 0.3747538924217224,
"mean": -2.0682646209024824e-05,
"std": 0.024059493094682693,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_out.0.bias": {
"min": -0.10526668280363083,
"max": 0.12198653072118759,
"mean": -0.001968209631741047,
"std": 0.0288400761783123,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.3.g": {
"min": 0.3114672601222992,
"max": 1.1185976266860962,
"mean": 0.6660763025283813,
"std": 0.09736555069684982,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.4.ff.0.0.weight": {
"min": -0.872668981552124,
"max": 0.6275054216384888,
"mean": 0.0016755885444581509,
"std": 0.04743882641196251,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.1.4.ff.0.0.bias": {
"min": -0.2710971236228943,
"max": 0.03426326811313629,
"mean": -0.0465819425880909,
"std": 0.04054969921708107,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.1.4.ff.2.weight": {
"min": -0.922234833240509,
"max": 0.9643772840499878,
"mean": 0.0010214494541287422,
"std": 0.04070669412612915,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.1.4.ff.2.bias": {
"min": -0.14429129660129547,
"max": 0.07484762370586395,
"mean": -0.00908473040908575,
"std": 0.025672495365142822,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.1.g": {
"min": 0.2402428686618805,
"max": 0.711609423160553,
"mean": 0.44710344076156616,
"std": 0.05906940996646881,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_q.weight": {
"min": -0.27207210659980774,
"max": 0.29753801226615906,
"mean": 9.350538675789721e-06,
"std": 0.035469669848680496,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_q.bias": {
"min": -0.11918215453624725,
"max": 0.1183757483959198,
"mean": 0.0007599537493661046,
"std": 0.027609599754214287,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_k.weight": {
"min": -0.2805421054363251,
"max": 0.2793859839439392,
"mean": -7.715764513704926e-05,
"std": 0.035099178552627563,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_k.bias": {
"min": -2.506035566329956,
"max": 2.518012046813965,
"mean": 0.026713747531175613,
"std": 0.5862806439399719,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_v.weight": {
"min": -0.22091814875602722,
"max": 0.27132153511047363,
"mean": 2.8913418645970523e-06,
"std": 0.0307327788323164,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_v.bias": {
"min": -0.03352135419845581,
"max": 0.03120853193104267,
"mean": 0.00011218251165701076,
"std": 0.012406233698129654,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_out.0.weight": {
"min": -0.2351619005203247,
"max": 0.23147742450237274,
"mean": 5.6937635235954076e-05,
"std": 0.0256962887942791,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_out.0.bias": {
"min": -0.1356453150510788,
"max": 0.1271977722644806,
"mean": -0.005494291428476572,
"std": 0.0399438738822937,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.3.g": {
"min": 0.3544028699398041,
"max": 1.1697261333465576,
"mean": 0.7103750109672546,
"std": 0.10338432341814041,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.4.ff.0.0.weight": {
"min": -0.6172477006912231,
"max": 0.5542004108428955,
"mean": 0.001160221640020609,
"std": 0.046119727194309235,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.2.4.ff.0.0.bias": {
"min": -0.18825410306453705,
"max": 0.024966172873973846,
"mean": -0.03482227772474289,
"std": 0.02857418917119503,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.2.4.ff.2.weight": {
"min": -1.130850911140442,
"max": 0.9707417488098145,
"mean": 0.0003595067828428,
"std": 0.042347487062215805,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.2.4.ff.2.bias": {
"min": -0.5971466898918152,
"max": 0.06270916759967804,
"mean": -0.004877141211181879,
"std": 0.02859053947031498,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.1.g": {
"min": 0.3752330243587494,
"max": 0.9386839866638184,
"mean": 0.5923458337783813,
"std": 0.06656130403280258,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_q.weight": {
"min": -0.3911682367324829,
"max": 0.3688437342643738,
"mean": 7.119165093172342e-05,
"std": 0.037188753485679626,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_q.bias": {
"min": -0.11875540018081665,
"max": 0.13628698885440826,
"mean": 0.0009287752327509224,
"std": 0.029227793216705322,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_k.weight": {
"min": -0.6185974478721619,
"max": 0.5083587169647217,
"mean": 1.5249222997226752e-05,
"std": 0.036442261189222336,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_k.bias": {
"min": -8.17552661895752,
"max": 8.776671409606934,
"mean": -0.1091664582490921,
"std": 1.6969325542449951,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_v.weight": {
"min": -0.27638494968414307,
"max": 0.23973813652992249,
"mean": 5.3197330998955294e-05,
"std": 0.03261549770832062,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_v.bias": {
"min": -0.051992662250995636,
"max": 0.03946495056152344,
"mean": 9.150505502475426e-05,
"std": 0.012954742647707462,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_out.0.weight": {
"min": -0.23067787289619446,
"max": 0.23443163931369781,
"mean": -2.1657757315551862e-05,
"std": 0.029391853138804436,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_out.0.bias": {
"min": -0.20401200652122498,
"max": 0.10544212907552719,
"mean": -0.004023304674774408,
"std": 0.0326065756380558,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.3.g": {
"min": 0.33983615040779114,
"max": 1.0106816291809082,
"mean": 0.7006407380104065,
"std": 0.09645594656467438,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.4.ff.0.0.weight": {
"min": -0.5642791390419006,
"max": 0.832179069519043,
"mean": 0.00041513508767820895,
"std": 0.042302437126636505,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.3.4.ff.0.0.bias": {
"min": -0.21134838461875916,
"max": 0.030589817091822624,
"mean": -0.032172758132219315,
"std": 0.026476319879293442,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.3.4.ff.2.weight": {
"min": -0.7536408305168152,
"max": 0.717832088470459,
"mean": -9.409409358340781e-06,
"std": 0.03684220835566521,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.3.4.ff.2.bias": {
"min": -0.2631220519542694,
"max": 0.10570736974477768,
"mean": -0.003029324347153306,
"std": 0.028848078101873398,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.1.g": {
"min": 0.28446710109710693,
"max": 0.6937389373779297,
"mean": 0.49939653277397156,
"std": 0.04629269987344742,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_q.weight": {
"min": -0.27887189388275146,
"max": 0.23408503830432892,
"mean": -0.00011133109364891425,
"std": 0.03876320272684097,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_q.bias": {
"min": -0.15426576137542725,
"max": 0.1266399770975113,
"mean": -0.0022300498094409704,
"std": 0.0333842970430851,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_k.weight": {
"min": -0.41348376870155334,
"max": 0.6593844294548035,
"mean": -1.978595719265286e-05,
"std": 0.039100244641304016,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_k.bias": {
"min": -4.232041358947754,
"max": 4.715827465057373,
"mean": -0.020488303154706955,
"std": 1.0068391561508179,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_v.weight": {
"min": -0.24481239914894104,
"max": 0.2074868232011795,
"mean": 4.380439349915832e-05,
"std": 0.03396626561880112,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_v.bias": {
"min": -0.03449943661689758,
"max": 0.044728994369506836,
"mean": -1.8020247807726264e-05,
"std": 0.012624197639524937,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_out.0.weight": {
"min": -0.20050014555454254,
"max": 0.20566238462924957,
"mean": -2.9678063583560288e-05,
"std": 0.03102380409836769,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_out.0.bias": {
"min": -0.19964830577373505,
"max": 0.11326169967651367,
"mean": -0.00291792256757617,
"std": 0.03448895364999771,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.3.g": {
"min": 0.36708179116249084,
"max": 1.0548574924468994,
"mean": 0.6704699397087097,
"std": 0.06616173684597015,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.4.ff.0.0.weight": {
"min": -0.397816002368927,
"max": 0.5021188855171204,
"mean": -3.856579860439524e-05,
"std": 0.041137274354696274,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.4.4.ff.0.0.bias": {
"min": -0.12784262001514435,
"max": 0.02675941213965416,
"mean": -0.030531462281942368,
"std": 0.02184327319264412,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.4.4.ff.2.weight": {
"min": -0.4485797882080078,
"max": 0.43235480785369873,
"mean": 8.378911297768354e-05,
"std": 0.034896139055490494,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.4.4.ff.2.bias": {
"min": -0.26721277832984924,
"max": 0.07248232513666153,
"mean": -0.0011095060035586357,
"std": 0.023109637200832367,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.1.g": {
"min": 0.287344753742218,
"max": 0.6839542388916016,
"mean": 0.5244242548942566,
"std": 0.047291453927755356,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_q.weight": {
"min": -0.22201856970787048,
"max": 0.22311273217201233,
"mean": 1.577789407747332e-05,
"std": 0.038952890783548355,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_q.bias": {
"min": -0.13627174496650696,
"max": 0.1090594157576561,
"mean": 0.00023713918926659971,
"std": 0.029215561226010323,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_k.weight": {
"min": -0.3747805953025818,
"max": 0.43678468465805054,
"mean": -9.573410352459177e-06,
"std": 0.03928905352950096,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_k.bias": {
"min": -3.840266227722168,
"max": 4.992228984832764,
"mean": 0.009751387871801853,
"std": 0.8444771766662598,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_v.weight": {
"min": -0.22314536571502686,
"max": 0.21986283361911774,
"mean": -2.0974857761757448e-07,
"std": 0.034413520246744156,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_v.bias": {
"min": -0.043581560254096985,
"max": 0.03578736633062363,
"mean": -0.00025875651044771075,
"std": 0.012076529674232006,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_out.0.weight": {
"min": -0.21286383271217346,
"max": 0.18843913078308105,
"mean": -1.6783855244284496e-05,
"std": 0.03154028207063675,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_out.0.bias": {
"min": -0.18049854040145874,
"max": 0.12063688784837723,
"mean": -0.0024107899516820908,
"std": 0.04124762490391731,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.3.g": {
"min": 0.4223836064338684,
"max": 0.9401367902755737,
"mean": 0.6626168489456177,
"std": 0.05654710531234741,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.4.ff.0.0.weight": {
"min": -0.3711914122104645,
"max": 0.4754900634288788,
"mean": -8.231064566643909e-05,
"std": 0.04089626669883728,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.5.4.ff.0.0.bias": {
"min": -0.2078404426574707,
"max": 0.02713177166879177,
"mean": -0.030231105163693428,
"std": 0.021318932995200157,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.5.4.ff.2.weight": {
"min": -0.3397354185581207,
"max": 0.7327741384506226,
"mean": 8.48791969474405e-05,
"std": 0.03477150574326515,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.5.4.ff.2.bias": {
"min": -0.23985552787780762,
"max": 0.050368692725896835,
"mean": -0.0011948456522077322,
"std": 0.02045026607811451,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.1.g": {
"min": 0.3060871660709381,
"max": 0.6523372530937195,
"mean": 0.5249941945075989,
"std": 0.04590437561273575,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_q.weight": {
"min": -0.30396750569343567,
"max": 0.2171545922756195,
"mean": 7.000747427809983e-05,
"std": 0.03949857875704765,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_q.bias": {
"min": -0.14921154081821442,
"max": 0.1312280148267746,
"mean": 0.00034826344926841557,
"std": 0.030445020645856857,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_k.weight": {
"min": -0.2569451630115509,
"max": 0.20191657543182373,
"mean": 3.105865835095756e-05,
"std": 0.03948771581053734,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_k.bias": {
"min": -2.332984685897827,
"max": 2.372544527053833,
"mean": -0.026222502812743187,
"std": 0.44942858815193176,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_v.weight": {
"min": -0.1888340413570404,
"max": 0.21024198830127716,
"mean": 3.7197845813352615e-05,
"std": 0.03479824960231781,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_v.bias": {
"min": -0.031675707548856735,
"max": 0.035443130880594254,
"mean": -0.00020022659737151116,
"std": 0.012285580858588219,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_out.0.weight": {
"min": -0.18818390369415283,
"max": 0.17026524245738983,
"mean": -6.799850234529004e-05,
"std": 0.032174814492464066,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_out.0.bias": {
"min": -0.13918116688728333,
"max": 0.13709498941898346,
"mean": -0.0025172303430736065,
"std": 0.05128452926874161,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.3.g": {
"min": 0.4672186076641083,
"max": 0.9546743631362915,
"mean": 0.6688124537467957,
"std": 0.05250026285648346,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.4.ff.0.0.weight": {
"min": -0.32424914836883545,
"max": 0.3096342980861664,
"mean": -1.5644945960957557e-06,
"std": 0.04095214605331421,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.6.4.ff.0.0.bias": {
"min": -0.12461961060762405,
"max": 0.02530832216143608,
"mean": -0.03069971315562725,
"std": 0.019789544865489006,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.6.4.ff.2.weight": {
"min": -0.43944308161735535,
"max": 0.4446093440055847,
"mean": 9.534660784993321e-05,
"std": 0.035124197602272034,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.6.4.ff.2.bias": {
"min": -0.22425536811351776,
"max": 0.051573775708675385,
"mean": -0.001182063017040491,
"std": 0.018455415964126587,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.1.g": {
"min": 0.3393731713294983,
"max": 0.737841010093689,
"mean": 0.5586089491844177,
"std": 0.04119626432657242,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_q.weight": {
"min": -0.2723452150821686,
"max": 0.2782283425331116,
"mean": 1.9915583834517747e-05,
"std": 0.04106247052550316,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_q.bias": {
"min": -0.13683027029037476,
"max": 0.1396752893924713,
"mean": 0.0004885591333732009,
"std": 0.026614630594849586,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_k.weight": {
"min": -0.49012690782546997,
"max": 0.35547417402267456,
"mean": 8.882825932232663e-05,
"std": 0.04070047289133072,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_k.bias": {
"min": -2.2938547134399414,
"max": 1.7426533699035645,
"mean": -0.021057037636637688,
"std": 0.49975258111953735,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_v.weight": {
"min": -0.21735826134681702,
"max": 0.19773884117603302,
"mean": -4.063967935508117e-05,
"std": 0.03423747047781944,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_v.bias": {
"min": -0.041265569627285004,
"max": 0.03861430287361145,
"mean": -0.00014519633259624243,
"std": 0.012876993976533413,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_out.0.weight": {
"min": -0.17728237807750702,
"max": 0.18350861966609955,
"mean": 4.7603076382074505e-05,
"std": 0.031560394912958145,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_out.0.bias": {
"min": -0.1796274185180664,
"max": 0.18359197676181793,
"mean": -0.0022178757935762405,
"std": 0.05480958893895149,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.3.g": {
"min": 0.47430306673049927,
"max": 1.0235347747802734,
"mean": 0.645234227180481,
"std": 0.05006485432386398,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.4.ff.0.0.weight": {
"min": -0.2717384696006775,
"max": 0.3092706799507141,
"mean": 0.0001124507180065848,
"std": 0.04068849980831146,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.7.4.ff.0.0.bias": {
"min": -0.10565188527107239,
"max": 0.026852920651435852,
"mean": -0.029502389952540398,
"std": 0.017905903980135918,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.7.4.ff.2.weight": {
"min": -0.33881059288978577,
"max": 0.3287763297557831,
"mean": 5.716992018278688e-05,
"std": 0.03441813588142395,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.7.4.ff.2.bias": {
"min": -0.1814029961824417,
"max": 0.04198184236884117,
"mean": -0.0010715797543525696,
"std": 0.017202889546751976,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.1.g": {
"min": 0.32546839118003845,
"max": 0.6852879524230957,
"mean": 0.5111152529716492,
"std": 0.036710962653160095,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_q.weight": {
"min": -0.23360855877399445,
"max": 0.22551532089710236,
"mean": -3.5930093872593716e-05,
"std": 0.039181701838970184,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_q.bias": {
"min": -0.11516069620847702,
"max": 0.13141536712646484,
"mean": 0.00015141721814870834,
"std": 0.02916705049574375,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_k.weight": {
"min": -0.3523465394973755,
"max": 0.2849816083908081,
"mean": 7.249596819747239e-06,
"std": 0.039250195026397705,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_k.bias": {
"min": -4.126643180847168,
"max": 3.538667678833008,
"mean": -0.011556778103113174,
"std": 0.681910514831543,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_v.weight": {
"min": -0.2112656831741333,
"max": 0.20894697308540344,
"mean": 3.47470777342096e-05,
"std": 0.03448949381709099,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_v.bias": {
"min": -0.03565378487110138,
"max": 0.0480014868080616,
"mean": 0.0007942374795675278,
"std": 0.012850471772253513,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_out.0.weight": {
"min": -0.21031072735786438,
"max": 0.19297289848327637,
"mean": -1.2874927506345557e-06,
"std": 0.03169998526573181,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_out.0.bias": {
"min": -0.18637274205684662,
"max": 0.17692941427230835,
"mean": -0.0028488910757005215,
"std": 0.05860321223735809,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.3.g": {
"min": 0.47467249631881714,
"max": 1.0397725105285645,
"mean": 0.6513394117355347,
"std": 0.049329087138175964,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.4.ff.0.0.weight": {
"min": -0.248422771692276,
"max": 0.32902756333351135,
"mean": 0.00018066739721689373,
"std": 0.04057690501213074,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.8.4.ff.0.0.bias": {
"min": -0.12427264451980591,
"max": 0.024594629183411598,
"mean": -0.030488643795251846,
"std": 0.017578164115548134,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.8.4.ff.2.weight": {
"min": -0.4205840826034546,
"max": 0.4813268184661865,
"mean": 2.129650965798646e-06,
"std": 0.035403117537498474,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.8.4.ff.2.bias": {
"min": -0.15161579847335815,
"max": 0.043303120881319046,
"mean": 3.9640130125917494e-05,
"std": 0.014866231009364128,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.1.g": {
"min": 0.3155551552772522,
"max": 0.6806549429893494,
"mean": 0.5528165102005005,
"std": 0.04051704332232475,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_q.weight": {
"min": -0.2062118798494339,
"max": 0.21964126825332642,
"mean": 3.0860355764161795e-05,
"std": 0.038303423672914505,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_q.bias": {
"min": -0.1376407891511917,
"max": 0.11259414255619049,
"mean": 2.069001493509859e-05,
"std": 0.02579990215599537,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_k.weight": {
"min": -0.40213435888290405,
"max": 0.3705216944217682,
"mean": 2.6252395400661044e-05,
"std": 0.03818526491522789,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_k.bias": {
"min": -3.765413761138916,
"max": 2.86456298828125,
"mean": 0.0011342763900756836,
"std": 0.5163310766220093,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_v.weight": {
"min": -0.20278441905975342,
"max": 0.1972842514514923,
"mean": 2.9531782274716534e-05,
"std": 0.034300558269023895,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_v.bias": {
"min": -0.05089922249317169,
"max": 0.03997639939188957,
"mean": -0.00041936602792702615,
"std": 0.013420597650110722,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_out.0.weight": {
"min": -0.19621425867080688,
"max": 0.20147208869457245,
"mean": -1.2328569937380962e-05,
"std": 0.0318082757294178,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_out.0.bias": {
"min": -0.19283677637577057,
"max": 0.1948237270116806,
"mean": -0.002969849156215787,
"std": 0.06253352016210556,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.3.g": {
"min": 0.34950727224349976,
"max": 1.081899642944336,
"mean": 0.6671000123023987,
"std": 0.05490493029356003,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.4.ff.0.0.weight": {
"min": -0.22493921220302582,
"max": 0.2511034309864044,
"mean": 0.0003591308486647904,
"std": 0.04076593369245529,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.9.4.ff.0.0.bias": {
"min": -0.09088904410600662,
"max": 0.04371574521064758,
"mean": -0.030075963586568832,
"std": 0.01758558303117752,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.9.4.ff.2.weight": {
"min": -0.35314324498176575,
"max": 0.303651362657547,
"mean": -4.348178117652424e-05,
"std": 0.03712818771600723,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.9.4.ff.2.bias": {
"min": -0.16180230677127838,
"max": 0.0634349063038826,
"mean": -8.249300299212337e-05,
"std": 0.019394585862755775,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.1.g": {
"min": 0.34883353114128113,
"max": 0.7206243872642517,
"mean": 0.5422865748405457,
"std": 0.03884800896048546,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_q.weight": {
"min": -0.21920670568943024,
"max": 0.22291362285614014,
"mean": -1.1165878277097363e-05,
"std": 0.039236169308423996,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_q.bias": {
"min": -0.11826413869857788,
"max": 0.17058128118515015,
"mean": 0.0002835137420333922,
"std": 0.02510087564587593,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_k.weight": {
"min": -0.2464587390422821,
"max": 0.3006129264831543,
"mean": -3.662023664219305e-05,
"std": 0.03893572464585304,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_k.bias": {
"min": -3.4999661445617676,
"max": 3.709076166152954,
"mean": 0.015840880572795868,
"std": 0.7814859747886658,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_v.weight": {
"min": -0.2185182124376297,
"max": 0.23746132850646973,
"mean": -1.3619632227346301e-05,
"std": 0.03630794584751129,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_v.bias": {
"min": -0.04712348431348801,
"max": 0.05133059248328209,
"mean": 0.00048102246364578605,
"std": 0.01351132895797491,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_out.0.weight": {
"min": -0.21373434364795685,
"max": 0.2173190861940384,
"mean": 5.650868115480989e-05,
"std": 0.033619917929172516,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_out.0.bias": {
"min": -0.21108141541481018,
"max": 0.23115544021129608,
"mean": -0.005106039810925722,
"std": 0.06184696406126022,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.3.g": {
"min": 0.36205485463142395,
"max": 1.099104642868042,
"mean": 0.6992122530937195,
"std": 0.05326760187745094,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.4.ff.0.0.weight": {
"min": -0.23436696827411652,
"max": 0.24465103447437286,
"mean": 0.00046349139302037656,
"std": 0.04127480834722519,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.10.4.ff.0.0.bias": {
"min": -0.09793505817651749,
"max": 0.0681939497590065,
"mean": -0.03142588585615158,
"std": 0.0180974081158638,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.10.4.ff.2.weight": {
"min": -0.3012528717517853,
"max": 0.3511028289794922,
"mean": -8.16234532976523e-05,
"std": 0.04028059542179108,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.10.4.ff.2.bias": {
"min": -0.15210135281085968,
"max": 0.14944450557231903,
"mean": 0.00025588623248040676,
"std": 0.023021480068564415,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.1.g": {
"min": 1.0,
"max": 1.0,
"mean": 1.0,
"std": 0.0,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_q.weight": {
"min": -0.031249936670064926,
"max": 0.031249839812517166,
"mean": -1.9292721844976768e-05,
"std": 0.01804409734904766,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_q.bias": {
"min": -0.031226642429828644,
"max": 0.03100142627954483,
"mean": -0.0010842883493751287,
"std": 0.01795371063053608,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_k.weight": {
"min": -0.031249966472387314,
"max": 0.031249895691871643,
"mean": 3.5441100862954045e-06,
"std": 0.018044503405690193,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_k.bias": {
"min": -0.031156372278928757,
"max": 0.031184475868940353,
"mean": 0.0003338930255267769,
"std": 0.018065759912133217,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_out.0.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.11.3.g": {
"min": 1.0,
"max": 1.0,
"mean": 1.0,
"std": 0.0,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.4.ff.0.0.weight": {
"min": -0.031249985098838806,
"max": 0.031249992549419403,
"mean": -8.39352924231207e-06,
"std": 0.018043218180537224,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.11.4.ff.0.0.bias": {
"min": -0.03124961629509926,
"max": 0.031239181756973267,
"mean": 0.00015365774743258953,
"std": 0.017994258552789688,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.11.4.ff.2.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
4096
]
},
"transformer.layers.11.4.ff.2.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.12.1.g": {
"min": 0.3829966187477112,
"max": 0.718121349811554,
"mean": 0.5806018114089966,
"std": 0.03862323611974716,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_q.weight": {
"min": -0.23782959580421448,
"max": 0.1963561624288559,
"mean": 2.6626767066773027e-05,
"std": 0.03746971860527992,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_q.bias": {
"min": -0.11848776042461395,
"max": 0.1658152937889099,
"mean": 0.0009899433935061097,
"std": 0.027532605454325676,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_k.weight": {
"min": -0.2458610236644745,
"max": 0.5000857710838318,
"mean": -5.0437982281437144e-05,
"std": 0.037627607583999634,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_k.bias": {
"min": -3.936108350753784,
"max": 3.7635273933410645,
"mean": -0.003571532666683197,
"std": 0.6807447671890259,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_v.weight": {
"min": -0.2272127866744995,
"max": 0.25125452876091003,
"mean": -1.1669091691146605e-05,
"std": 0.03743912652134895,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_v.bias": {
"min": -0.07160257548093796,
"max": 0.08056868612766266,
"mean": -0.0005193912656977773,
"std": 0.015654100105166435,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_out.0.weight": {
"min": -0.22808189690113068,
"max": 0.25764524936676025,
"mean": -2.8624439437408e-05,
"std": 0.03542578965425491,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_out.0.bias": {
"min": -0.2000962197780609,
"max": 0.21490387618541718,
"mean": -0.0055319443345069885,
"std": 0.0682973712682724,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.3.g": {
"min": 0.4052681028842926,
"max": 1.1870543956756592,
"mean": 0.7378469705581665,
"std": 0.05485502630472183,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.4.ff.0.0.weight": {
"min": -0.22090063989162445,
"max": 0.24591459333896637,
"mean": 0.0005211709067225456,
"std": 0.041342560201883316,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.12.4.ff.0.0.bias": {
"min": -0.10329551994800568,
"max": 0.02418467588722706,
"mean": -0.03265417367219925,
"std": 0.0188569538295269,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.12.4.ff.2.weight": {
"min": -0.44879788160324097,
"max": 0.421781986951828,
"mean": -0.00043243536492809653,
"std": 0.046903904527425766,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.12.4.ff.2.bias": {
"min": -0.25108596682548523,
"max": 0.46939900517463684,
"mean": 0.003194585908204317,
"std": 0.04450792446732521,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.0.weight": {
"min": -0.3169757127761841,
"max": 0.33316904306411743,
"mean": -2.5288825781899504e-05,
"std": 0.021290883421897888,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.13.1.g": {
"min": 0.3246179223060608,
"max": 0.6840593218803406,
"mean": 0.5709414482116699,
"std": 0.04453985393047333,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_q.weight": {
"min": -0.16449199616909027,
"max": 0.17385058104991913,
"mean": -4.8540678108111024e-05,
"std": 0.033184703439474106,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_q.bias": {
"min": -0.18657186627388,
"max": 0.14269262552261353,
"mean": 3.6818586522713304e-05,
"std": 0.029670175164937973,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_k.weight": {
"min": -0.3801823556423187,
"max": 0.24568894505500793,
"mean": -1.0017960448749363e-05,
"std": 0.0327659472823143,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_k.bias": {
"min": -3.6502017974853516,
"max": 3.2850754261016846,
"mean": -0.014260413125157356,
"std": 0.9845133423805237,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_v.weight": {
"min": -0.2349099963903427,
"max": 0.2473423033952713,
"mean": -1.7784623196348548e-05,
"std": 0.04170290008187294,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_v.bias": {
"min": -0.07268015295267105,
"max": 0.1542970985174179,
"mean": 0.000663664482999593,
"std": 0.02515619620680809,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_out.0.weight": {
"min": -0.2664458751678467,
"max": 0.2483866959810257,
"mean": -1.5342577171395533e-05,
"std": 0.040143273770809174,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_out.0.bias": {
"min": -0.18931904435157776,
"max": 0.19443899393081665,
"mean": -0.0012288358993828297,
"std": 0.06666287034749985,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.3.g": {
"min": 0.32919859886169434,
"max": 0.997564435005188,
"mean": 0.7190552949905396,
"std": 0.051983967423439026,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.4.ff.0.0.weight": {
"min": -0.2313733994960785,
"max": 0.24550800025463104,
"mean": 0.00018263014499098063,
"std": 0.04090628772974014,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.13.4.ff.0.0.bias": {
"min": -0.11402574181556702,
"max": 0.018650896847248077,
"mean": -0.0424647182226181,
"std": 0.0188254714012146,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.13.4.ff.2.weight": {
"min": -0.3894314467906952,
"max": 0.4067791998386383,
"mean": -2.1846279196324758e-05,
"std": 0.048540692776441574,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.13.4.ff.2.bias": {
"min": -0.692162811756134,
"max": 0.4120035469532013,
"mean": 0.000852768833283335,
"std": 0.060242246836423874,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.0.weight": {
"min": 0.0,
"max": 1.0,
"mean": 0.00048828125,
"std": 0.0220916960388422,
"sparsity": 0.99951171875,
"shape": [
1024,
2048
]
},
"transformer.layers.14.1.g": {
"min": 1.0,
"max": 1.0,
"mean": 1.0,
"std": 0.0,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_q.weight": {
"min": -0.031249970197677612,
"max": 0.031249817460775375,
"mean": -2.1022657165303826e-05,
"std": 0.018035436049103737,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_q.bias": {
"min": -0.03122086077928543,
"max": 0.031233571469783783,
"mean": -0.0006771883927285671,
"std": 0.01782997138798237,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_k.weight": {
"min": -0.03124987706542015,
"max": 0.031249921768903732,
"mean": -8.839062502374873e-06,
"std": 0.01803446188569069,
"sparsity": 9.5367431640625e-07,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_k.bias": {
"min": -0.031232360750436783,
"max": 0.031245984137058258,
"mean": -0.0007298353011719882,
"std": 0.017944591119885445,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_out.0.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.14.3.g": {
"min": 1.0,
"max": 1.0,
"mean": 1.0,
"std": 0.0,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.4.ff.0.0.weight": {
"min": -0.03125,
"max": 0.031249988824129105,
"mean": 3.591749646147946e-06,
"std": 0.018040824681520462,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.14.4.ff.0.0.bias": {
"min": -0.031234480440616608,
"max": 0.031246982514858246,
"mean": 0.0001957040512934327,
"std": 0.018076537176966667,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.14.4.ff.2.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
4096
]
},
"transformer.layers.14.4.ff.2.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.15.0.weight": {
"min": -0.23450319468975067,
"max": 0.2724616229534149,
"mean": 6.948144346097251e-06,
"std": 0.01881224475800991,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.15.1.g": {
"min": 0.32128843665122986,
"max": 0.6922435760498047,
"mean": 0.5815606117248535,
"std": 0.045744746923446655,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_q.weight": {
"min": -0.18168264627456665,
"max": 0.1974717229604721,
"mean": -1.171275016531581e-05,
"std": 0.03318728506565094,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_q.bias": {
"min": -0.16043128073215485,
"max": 0.1292782723903656,
"mean": -0.0010662535205483437,
"std": 0.034117527306079865,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_k.weight": {
"min": -0.3318951725959778,
"max": 0.31116846203804016,
"mean": -1.0326401024940424e-05,
"std": 0.03223801404237747,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_k.bias": {
"min": -7.791203022003174,
"max": 8.74953842163086,
"mean": 0.09337067604064941,
"std": 1.61784029006958,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_v.weight": {
"min": -0.23363685607910156,
"max": 0.24183623492717743,
"mean": 4.133234324399382e-05,
"std": 0.0408620610833168,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_v.bias": {
"min": -0.07588791847229004,
"max": 0.0656837597489357,
"mean": 0.00047856790479272604,
"std": 0.01940334029495716,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_out.0.weight": {
"min": -0.2455652505159378,
"max": 0.2337566763162613,
"mean": -2.8880367608508095e-06,
"std": 0.03943672403693199,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_out.0.bias": {
"min": -0.16261433064937592,
"max": 0.1605682373046875,
"mean": 0.0016338212881237268,
"std": 0.06525633484125137,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.3.g": {
"min": 0.5568146705627441,
"max": 0.9421050548553467,
"mean": 0.7127699851989746,
"std": 0.03979077190160751,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.4.ff.0.0.weight": {
"min": -0.22831875085830688,
"max": 0.2548784911632538,
"mean": -4.536488631856628e-05,
"std": 0.040581412613391876,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.15.4.ff.0.0.bias": {
"min": -0.13459284603595734,
"max": 0.02228192612528801,
"mean": -0.04134010896086693,
"std": 0.018355557695031166,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.15.4.ff.2.weight": {
"min": -0.4211236536502838,
"max": 0.3922184407711029,
"mean": -4.3558138713706285e-06,
"std": 0.04779110848903656,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.15.4.ff.2.bias": {
"min": -0.6065256595611572,
"max": 0.6503778696060181,
"mean": 0.0015810506884008646,
"std": 0.05679204687476158,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.0.weight": {
"min": -0.2516687214374542,
"max": 0.3206498920917511,
"mean": -6.057634891476482e-06,
"std": 0.0196156594902277,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.16.1.g": {
"min": 0.35995498299598694,
"max": 0.6810278296470642,
"mean": 0.5706292986869812,
"std": 0.042767371982336044,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_q.weight": {
"min": -0.22037938237190247,
"max": 0.1769036501646042,
"mean": -3.467117130639963e-05,
"std": 0.03430242836475372,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_q.bias": {
"min": -0.16339237987995148,
"max": 0.23269455134868622,
"mean": 0.00036311167059466243,
"std": 0.03283863142132759,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_k.weight": {
"min": -0.2634328007698059,
"max": 0.23954781889915466,
"mean": -5.2383133152034134e-05,
"std": 0.03390158340334892,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_k.bias": {
"min": -4.847443580627441,
"max": 5.083292484283447,
"mean": 0.043835077434778214,
"std": 1.227935552597046,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_v.weight": {
"min": -0.24653136730194092,
"max": 0.25027644634246826,
"mean": 7.213905337266624e-05,
"std": 0.04399324953556061,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_v.bias": {
"min": -0.06254159659147263,
"max": 0.054444003850221634,
"mean": 0.000650427769869566,
"std": 0.017183585092425346,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_out.0.weight": {
"min": -0.28619009256362915,
"max": 0.2717132866382599,
"mean": -4.993668699171394e-05,
"std": 0.04299163073301315,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_out.0.bias": {
"min": -0.16040603816509247,
"max": 0.17025713622570038,
"mean": -0.0028844610787928104,
"std": 0.05926158279180527,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.3.g": {
"min": 0.5196964740753174,
"max": 0.9310137629508972,
"mean": 0.7133955955505371,
"std": 0.03807961940765381,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.4.ff.0.0.weight": {
"min": -0.23809659481048584,
"max": 0.24939550459384918,
"mean": 0.00046480150194838643,
"std": 0.04046152904629707,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.16.4.ff.0.0.bias": {
"min": -0.14403879642486572,
"max": 0.041449662297964096,
"mean": -0.03967723995447159,
"std": 0.02051496133208275,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.16.4.ff.2.weight": {
"min": -0.5321223735809326,
"max": 0.582199215888977,
"mean": 5.9441426856210455e-06,
"std": 0.04886837303638458,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.16.4.ff.2.bias": {
"min": -0.5183588862419128,
"max": 0.49274152517318726,
"mean": 0.0023598431143909693,
"std": 0.053401440382003784,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.0.weight": {
"min": -0.27355626225471497,
"max": 0.31514689326286316,
"mean": 1.8169534996559378e-06,
"std": 0.020052826032042503,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.17.1.g": {
"min": 0.36634165048599243,
"max": 0.7102516293525696,
"mean": 0.5930806994438171,
"std": 0.04571138322353363,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_q.weight": {
"min": -0.21087931096553802,
"max": 0.1994456797838211,
"mean": 3.07354457618203e-05,
"std": 0.034868594259023666,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_q.bias": {
"min": -0.1869715005159378,
"max": 0.20369935035705566,
"mean": 0.0009553421987220645,
"std": 0.0314984992146492,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_k.weight": {
"min": -0.28932973742485046,
"max": 0.33943668007850647,
"mean": -4.7415778681170195e-05,
"std": 0.034589733928442,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_k.bias": {
"min": -3.8712191581726074,
"max": 3.3820998668670654,
"mean": 0.014444351196289062,
"std": 0.8576834797859192,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_v.weight": {
"min": -0.2242382913827896,
"max": 0.24965918064117432,
"mean": -4.0143440855899826e-06,
"std": 0.04223589971661568,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_v.bias": {
"min": -0.05498581379652023,
"max": 0.046769097447395325,
"mean": -1.842428173404187e-05,
"std": 0.015840334817767143,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_out.0.weight": {
"min": -0.2928566634654999,
"max": 0.29091376066207886,
"mean": -7.36157790015568e-06,
"std": 0.04195090010762215,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_out.0.bias": {
"min": -0.12467863410711288,
"max": 0.25901108980178833,
"mean": -0.003233879804611206,
"std": 0.05313729867339134,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.3.g": {
"min": 0.4561373293399811,
"max": 0.8428487777709961,
"mean": 0.7054461240768433,
"std": 0.03489769622683525,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.4.ff.0.0.weight": {
"min": -0.5113534331321716,
"max": 0.3484715223312378,
"mean": 0.0003426253970246762,
"std": 0.04020649194717407,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.17.4.ff.0.0.bias": {
"min": -0.18678922951221466,
"max": 0.03952203318476677,
"mean": -0.03937358409166336,
"std": 0.02131999284029007,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.17.4.ff.2.weight": {
"min": -0.5436691045761108,
"max": 0.5556817054748535,
"mean": -7.17876828275621e-05,
"std": 0.05074293538928032,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.17.4.ff.2.bias": {
"min": -0.5110356211662292,
"max": 0.6633175015449524,
"mean": 0.002444919664412737,
"std": 0.04948664829134941,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.0.weight": {
"min": -0.3323739171028137,
"max": 0.2654549777507782,
"mean": 3.673961600725306e-06,
"std": 0.019390413537621498,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.18.1.g": {
"min": 0.32227811217308044,
"max": 0.7648001313209534,
"mean": 0.6509190201759338,
"std": 0.04508262872695923,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_q.weight": {
"min": -0.24930793046951294,
"max": 0.21936655044555664,
"mean": -2.44708098762203e-06,
"std": 0.036502547562122345,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_q.bias": {
"min": -0.32666686177253723,
"max": 0.2868551015853882,
"mean": -0.0006774846115149558,
"std": 0.03851696848869324,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_k.weight": {
"min": -0.3097042739391327,
"max": 0.3694048821926117,
"mean": 6.485832273028791e-05,
"std": 0.03624315932393074,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_k.bias": {
"min": -4.71013069152832,
"max": 5.798623085021973,
"mean": 0.03792855516076088,
"std": 1.41161048412323,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_v.weight": {
"min": -0.22137394547462463,
"max": 0.20554855465888977,
"mean": -7.500727951992303e-05,
"std": 0.042491503059864044,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_v.bias": {
"min": -0.07746972888708115,
"max": 0.05126894265413284,
"mean": -0.0009250898147001863,
"std": 0.016401393339037895,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_out.0.weight": {
"min": -0.33084556460380554,
"max": 0.32904890179634094,
"mean": -4.916631951346062e-06,
"std": 0.042798250913619995,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_out.0.bias": {
"min": -0.2845572233200073,
"max": 0.11143017560243607,
"mean": -0.0012043914757668972,
"std": 0.04699280112981796,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.3.g": {
"min": 0.48666608333587646,
"max": 0.885034441947937,
"mean": 0.7373895049095154,
"std": 0.03794779255986214,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.4.ff.0.0.weight": {
"min": -0.3611343502998352,
"max": 0.27392831444740295,
"mean": 5.1206770876888186e-05,
"std": 0.04065323248505592,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.18.4.ff.0.0.bias": {
"min": -0.2472306787967682,
"max": 0.046531591564416885,
"mean": -0.03925502672791481,
"std": 0.023223698139190674,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.18.4.ff.2.weight": {
"min": -0.62546706199646,
"max": 0.596234142780304,
"mean": -6.186795508256182e-05,
"std": 0.0531260222196579,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.18.4.ff.2.bias": {
"min": -0.7086492776870728,
"max": 0.2654070556163788,
"mean": 0.0009191531571559608,
"std": 0.05119417607784271,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.0.weight": {
"min": -0.34331265091896057,
"max": 0.30340248346328735,
"mean": 2.3374013835564256e-07,
"std": 0.019139692187309265,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.19.1.g": {
"min": 0.3500247001647949,
"max": 0.7813002467155457,
"mean": 0.6387312412261963,
"std": 0.048984214663505554,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_q.weight": {
"min": -0.20559599995613098,
"max": 0.20657846331596375,
"mean": -5.995870742481202e-05,
"std": 0.03769858554005623,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_q.bias": {
"min": -0.25827330350875854,
"max": 0.26797717809677124,
"mean": -0.00040583324152976274,
"std": 0.04458905756473541,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_k.weight": {
"min": -0.35375165939331055,
"max": 0.32213273644447327,
"mean": -7.335219379456248e-06,
"std": 0.03720685839653015,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_k.bias": {
"min": -5.253459930419922,
"max": 4.198183536529541,
"mean": -0.0263908039778471,
"std": 1.0056793689727783,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_v.weight": {
"min": -0.23853513598442078,
"max": 0.24350698292255402,
"mean": -2.5575776817277074e-05,
"std": 0.04321583732962608,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_v.bias": {
"min": -0.06232254579663277,
"max": 0.05653427913784981,
"mean": 0.0003516775614116341,
"std": 0.014141896739602089,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_out.0.weight": {
"min": -0.437425822019577,
"max": 0.3736904561519623,
"mean": 1.4616346561524551e-05,
"std": 0.044127896428108215,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_out.0.bias": {
"min": -0.09596914798021317,
"max": 0.17601557075977325,
"mean": -0.0006586366798728704,
"std": 0.03512872755527496,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.3.g": {
"min": 0.42178472876548767,
"max": 1.06712007522583,
"mean": 0.7484290599822998,
"std": 0.04182668402791023,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.4.ff.0.0.weight": {
"min": -0.26583534479141235,
"max": 0.29665902256965637,
"mean": -7.891673885751516e-05,
"std": 0.04081389307975769,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.19.4.ff.0.0.bias": {
"min": -0.18455219268798828,
"max": 0.043140046298503876,
"mean": -0.03679502755403519,
"std": 0.0255513247102499,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.19.4.ff.2.weight": {
"min": -0.45756417512893677,
"max": 0.4861648976802826,
"mean": 4.3982381612295285e-05,
"std": 0.05422103777527809,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.19.4.ff.2.bias": {
"min": -0.2858409285545349,
"max": 0.5508930087089539,
"mean": -0.0008807203266769648,
"std": 0.047792647033929825,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.0.weight": {
"min": -0.2925868332386017,
"max": 0.32265621423721313,
"mean": 6.008186119288439e-06,
"std": 0.0199727825820446,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.20.1.g": {
"min": 0.2913132309913635,
"max": 0.7585903406143188,
"mean": 0.6507112979888916,
"std": 0.05193017050623894,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_q.weight": {
"min": -0.24352194368839264,
"max": 0.26151588559150696,
"mean": -5.6967542150232475e-06,
"std": 0.03961416333913803,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_q.bias": {
"min": -0.26712363958358765,
"max": 0.19983239471912384,
"mean": -0.0008771903812885284,
"std": 0.0517287477850914,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_k.weight": {
"min": -0.2718246877193451,
"max": 0.25335949659347534,
"mean": 5.239124220679514e-06,
"std": 0.03871086984872818,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_k.bias": {
"min": -12.94522476196289,
"max": 15.922240257263184,
"mean": 0.03318937495350838,
"std": 1.9867888689041138,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_v.weight": {
"min": -0.20649555325508118,
"max": 0.22559243440628052,
"mean": -7.256461685756221e-05,
"std": 0.040558841079473495,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_v.bias": {
"min": -0.06932304799556732,
"max": 0.06304260343313217,
"mean": 0.0001579949603183195,
"std": 0.014740646816790104,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_out.0.weight": {
"min": -0.4653640687465668,
"max": 0.3200652003288269,
"mean": 1.952598540810868e-05,
"std": 0.04059439152479172,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_out.0.bias": {
"min": -0.06398282200098038,
"max": 0.11537733674049377,
"mean": 0.0011978133115917444,
"std": 0.02469516545534134,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.3.g": {
"min": 0.3749999403953552,
"max": 0.9300609230995178,
"mean": 0.7510109543800354,
"std": 0.040018972009420395,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.4.ff.0.0.weight": {
"min": -0.27868181467056274,
"max": 0.27277180552482605,
"mean": -0.00016834630514495075,
"std": 0.041004978120326996,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.20.4.ff.0.0.bias": {
"min": -0.19812321662902832,
"max": 0.05135354399681091,
"mean": -0.032012395560741425,
"std": 0.025048717856407166,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.20.4.ff.2.weight": {
"min": -0.65754234790802,
"max": 0.5349372029304504,
"mean": -5.049940591561608e-05,
"std": 0.052857208997011185,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.20.4.ff.2.bias": {
"min": -0.1923648864030838,
"max": 0.5813060998916626,
"mean": -0.0005128913326188922,
"std": 0.041049525141716,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.0.weight": {
"min": -0.417529821395874,
"max": 0.3719121813774109,
"mean": 6.524643140437547e-06,
"std": 0.021627992391586304,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.21.1.g": {
"min": 0.21460720896720886,
"max": 0.7452309131622314,
"mean": 0.6493626832962036,
"std": 0.054172683507204056,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_q.weight": {
"min": -0.20914840698242188,
"max": 0.19524669647216797,
"mean": 4.0109844121616334e-05,
"std": 0.03945964202284813,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_q.bias": {
"min": -0.32907912135124207,
"max": 0.25925326347351074,
"mean": -0.003227418288588524,
"std": 0.05623279884457588,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_k.weight": {
"min": -0.20563212037086487,
"max": 0.25434860587120056,
"mean": 5.404070907388814e-05,
"std": 0.038562316447496414,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_k.bias": {
"min": -6.2339768409729,
"max": 6.921723365783691,
"mean": 0.04828859120607376,
"std": 1.383695363998413,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_v.weight": {
"min": -0.20957675576210022,
"max": 0.23022468388080597,
"mean": -4.7416378947673365e-06,
"std": 0.04131784662604332,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_v.bias": {
"min": -0.043760623782873154,
"max": 0.03593071922659874,
"mean": -6.6086213337257504e-06,
"std": 0.012794941663742065,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_out.0.weight": {
"min": -0.3974460959434509,
"max": 0.3449029326438904,
"mean": -5.5259803048102185e-05,
"std": 0.0423947237432003,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_out.0.bias": {
"min": -0.055080167949199677,
"max": 0.06271716207265854,
"mean": 0.0003585012163966894,
"std": 0.018664730712771416,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.3.g": {
"min": 0.3508152663707733,
"max": 1.0430189371109009,
"mean": 0.789574146270752,
"std": 0.048565711826086044,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.4.ff.0.0.weight": {
"min": -0.3336288034915924,
"max": 0.38612979650497437,
"mean": -0.00016904372023418546,
"std": 0.041490498930215836,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.21.4.ff.0.0.bias": {
"min": -0.15726615488529205,
"max": 0.05897233635187149,
"mean": -0.031808022409677505,
"std": 0.02507229521870613,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.21.4.ff.2.weight": {
"min": -0.6961155533790588,
"max": 0.4685930609703064,
"mean": -8.521115523763001e-05,
"std": 0.05180642008781433,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.21.4.ff.2.bias": {
"min": -0.24746476113796234,
"max": 0.32834842801094055,
"mean": -0.00026278701261617243,
"std": 0.041423212736845016,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.0.weight": {
"min": -0.2869253158569336,
"max": 0.35028234124183655,
"mean": -2.780619524855865e-06,
"std": 0.02424117736518383,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.22.1.g": {
"min": 0.1968069076538086,
"max": 0.7775169014930725,
"mean": 0.6701230406761169,
"std": 0.058515764772892,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_q.weight": {
"min": -0.2286878526210785,
"max": 0.23117558658123016,
"mean": -2.085552659991663e-05,
"std": 0.04044000059366226,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_q.bias": {
"min": -0.2196890264749527,
"max": 0.24058501422405243,
"mean": 0.0007775035337544978,
"std": 0.05580567941069603,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_k.weight": {
"min": -0.21652470529079437,
"max": 0.2261732518672943,
"mean": -7.23175035091117e-05,
"std": 0.03937419131398201,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_k.bias": {
"min": -8.891955375671387,
"max": 9.054566383361816,
"mean": -0.0012135691940784454,
"std": 1.846129059791565,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_v.weight": {
"min": -0.2690034806728363,
"max": 0.25858405232429504,
"mean": 4.355451528681442e-05,
"std": 0.03841076418757439,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_v.bias": {
"min": -0.057884324342012405,
"max": 0.05789237469434738,
"mean": 0.0003543176280800253,
"std": 0.014708762988448143,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_out.0.weight": {
"min": -0.263511061668396,
"max": 0.288027822971344,
"mean": -6.177674367791042e-05,
"std": 0.03907754644751549,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_out.0.bias": {
"min": -0.044037725776433945,
"max": 0.037295691668987274,
"mean": -9.799870167626068e-05,
"std": 0.013339235447347164,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.3.g": {
"min": 0.339274525642395,
"max": 1.0903433561325073,
"mean": 0.8638954162597656,
"std": 0.06374805420637131,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.4.ff.0.0.weight": {
"min": -0.4230613112449646,
"max": 0.41900894045829773,
"mean": 0.00031366912298835814,
"std": 0.043512988835573196,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.22.4.ff.0.0.bias": {
"min": -0.21445079147815704,
"max": 0.17045123875141144,
"mean": -0.029427748173475266,
"std": 0.03184095025062561,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.22.4.ff.2.weight": {
"min": -0.5979012846946716,
"max": 0.559224545955658,
"mean": -0.00014804149395786226,
"std": 0.053461432456970215,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.22.4.ff.2.bias": {
"min": -0.17863567173480988,
"max": 0.3767751455307007,
"mean": 0.0013495876919478178,
"std": 0.037288032472133636,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.0.weight": {
"min": -0.3942491412162781,
"max": 0.3687455952167511,
"mean": 3.7661615351680666e-05,
"std": 0.028617454692721367,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.23.1.g": {
"min": 0.2907008230686188,
"max": 0.8258129358291626,
"mean": 0.7054593563079834,
"std": 0.06773429363965988,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_q.weight": {
"min": -0.9265665411949158,
"max": 1.0269814729690552,
"mean": -2.791242877719924e-05,
"std": 0.04764382541179657,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_q.bias": {
"min": -0.8779393434524536,
"max": 0.8145599365234375,
"mean": -0.0002924790605902672,
"std": 0.09544122219085693,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_k.weight": {
"min": -0.27007606625556946,
"max": 0.24068056046962738,
"mean": -2.2448431991506368e-05,
"std": 0.038949914276599884,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_k.bias": {
"min": -23.705463409423828,
"max": 22.81535530090332,
"mean": -0.09178592264652252,
"std": 4.064526081085205,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_v.weight": {
"min": -0.2275296449661255,
"max": 0.2455320507287979,
"mean": -2.5536401153658517e-05,
"std": 0.03864150494337082,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_v.bias": {
"min": -0.06007588282227516,
"max": 0.045354753732681274,
"mean": -0.00013596308417618275,
"std": 0.014683394692838192,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_out.0.weight": {
"min": -0.33782336115837097,
"max": 0.3746013939380646,
"mean": 7.420163456117734e-06,
"std": 0.04082043468952179,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_out.0.bias": {
"min": -0.046125710010528564,
"max": 0.19506430625915527,
"mean": 0.0002738517359830439,
"std": 0.013541821390390396,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.3.g": {
"min": 0.373764306306839,
"max": 1.1280238628387451,
"mean": 0.8901123404502869,
"std": 0.06384868174791336,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.4.ff.0.0.weight": {
"min": -0.44741326570510864,
"max": 0.5422499775886536,
"mean": 2.5218110749847256e-05,
"std": 0.045580700039863586,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.23.4.ff.0.0.bias": {
"min": -0.22342835366725922,
"max": 0.08723597973585129,
"mean": -0.03199537843465805,
"std": 0.03770318627357483,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.23.4.ff.2.weight": {
"min": -0.7254156470298767,
"max": 0.6879446506500244,
"mean": 3.628328340710141e-05,
"std": 0.05179440602660179,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.23.4.ff.2.bias": {
"min": -0.174102783203125,
"max": 0.2178839147090912,
"mean": 3.535003634169698e-05,
"std": 0.03175075352191925,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.0.weight": {
"min": -0.33916032314300537,
"max": 0.37271323800086975,
"mean": 4.308380448492244e-05,
"std": 0.034135378897190094,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.24.1.g": {
"min": 0.3176645338535309,
"max": 1.2846463918685913,
"mean": 0.6014195084571838,
"std": 0.08323279023170471,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_q.weight": {
"min": -0.2829808294773102,
"max": 0.26017650961875916,
"mean": -3.0644375783595024e-06,
"std": 0.035980723798274994,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_q.bias": {
"min": -0.23540745675563812,
"max": 0.20547473430633545,
"mean": 0.0002399118966422975,
"std": 0.056001532822847366,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_k.weight": {
"min": -0.43518391251564026,
"max": 0.32444700598716736,
"mean": 2.422756006126292e-05,
"std": 0.03412417694926262,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_k.bias": {
"min": -5.537700176239014,
"max": 7.30228853225708,
"mean": -0.007349951192736626,
"std": 0.6983441114425659,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_v.weight": {
"min": -0.34386035799980164,
"max": 0.3621582090854645,
"mean": 0.00010323335300199687,
"std": 0.04783642664551735,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_v.bias": {
"min": -0.07365774363279343,
"max": 0.060269735753536224,
"mean": 0.0009362755226902664,
"std": 0.014931198209524155,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_out.0.weight": {
"min": -0.2561565041542053,
"max": 0.2865042984485626,
"mean": 4.9739428504835814e-06,
"std": 0.04156460985541344,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_out.0.bias": {
"min": -0.055231235921382904,
"max": 0.06271004676818848,
"mean": 0.00012724015687126666,
"std": 0.0071450709365308285,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.3.g": {
"min": 0.49412763118743896,
"max": 1.2182179689407349,
"mean": 1.0133787393569946,
"std": 0.11725164949893951,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.4.ff.0.0.weight": {
"min": -1.0939558744430542,
"max": 1.0474863052368164,
"mean": -4.8846173740457743e-05,
"std": 0.052417904138565063,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.24.4.ff.0.0.bias": {
"min": -0.22328178584575653,
"max": 0.172784686088562,
"mean": -0.02721056528389454,
"std": 0.0362662672996521,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.24.4.ff.2.weight": {
"min": -0.8832080960273743,
"max": 0.9217195510864258,
"mean": -0.00014604278840124607,
"std": 0.05329865962266922,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.24.4.ff.2.bias": {
"min": -0.1707809567451477,
"max": 0.3790228068828583,
"mean": 0.003364440519362688,
"std": 0.03984135016798973,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.0.weight": {
"min": -0.7773804068565369,
"max": 0.7221406698226929,
"mean": 1.8065227777697146e-05,
"std": 0.04615423083305359,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.25.1.g": {
"min": 0.33866187930107117,
"max": 1.425328254699707,
"mean": 0.9481796622276306,
"std": 0.20640140771865845,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_q.weight": {
"min": -1.7458044290542603,
"max": 1.704500436782837,
"mean": 0.00022708994220010936,
"std": 0.15870554745197296,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_q.bias": {
"min": -1.19757080078125,
"max": 1.0991984605789185,
"mean": -0.009535851888358593,
"std": 0.2035919725894928,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_k.weight": {
"min": -0.4207988381385803,
"max": 0.4279989004135132,
"mean": 6.386132736224681e-05,
"std": 0.04802023991942406,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_k.bias": {
"min": -19.71625328063965,
"max": 19.51169776916504,
"mean": -0.24800625443458557,
"std": 4.769559860229492,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_v.weight": {
"min": -0.3236338496208191,
"max": 0.438272625207901,
"mean": -1.1853735486511141e-05,
"std": 0.04616710543632507,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_v.bias": {
"min": -0.03371698036789894,
"max": 0.03678824380040169,
"mean": 0.0006397695397026837,
"std": 0.0129077835008502,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_out.0.weight": {
"min": -0.7031863331794739,
"max": 0.6687424182891846,
"mean": 4.257483305991627e-05,
"std": 0.057892125099897385,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_out.0.bias": {
"min": -0.0722241997718811,
"max": 0.0676589161157608,
"mean": -0.0001341316383332014,
"std": 0.012878631241619587,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.3.g": {
"min": 0.38035547733306885,
"max": 1.3902052640914917,
"mean": 1.066498041152954,
"std": 0.21949008107185364,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.4.ff.0.0.weight": {
"min": -0.6164002418518066,
"max": 0.7182905673980713,
"mean": 0.00011321296915411949,
"std": 0.05802781134843826,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.25.4.ff.0.0.bias": {
"min": -0.2184617668390274,
"max": 0.22462666034698486,
"mean": 0.006169781554490328,
"std": 0.04965030029416084,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.25.4.ff.2.weight": {
"min": -0.6297575831413269,
"max": 0.8895801901817322,
"mean": 1.2445923857740127e-05,
"std": 0.023545311763882637,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.25.4.ff.2.bias": {
"min": -0.506031334400177,
"max": 0.47297078371047974,
"mean": -0.0030135007109493017,
"std": 0.0691458210349083,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.norm_out.g": {
"min": 0.5383259057998657,
"max": 1.1772801876068115,
"mean": 0.7824772596359253,
"std": 0.09824033081531525,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.proj_out.weight": {
"min": -0.26664498448371887,
"max": 0.2126948982477188,
"mean": -0.00022273289505392313,
"std": 0.05400582030415535,
"sparsity": 0.0,
"shape": [
100,
1024
]
},
"transformer.proj_out.bias": {
"min": -0.23798410594463348,
"max": 0.014864158816635609,
"mean": -0.04389958456158638,
"std": 0.03423725813627243,
"sparsity": 0.0,
"shape": [
100
]
}
}
}