k15 / model_analysis.json
memevis's picture
Upload folder using huggingface_hub
622becd verified
{
"layer_types": {
"transformer": 391
},
"parameter_counts": {
"transformer.time_embed.time_mlp.0.weight": 262144,
"transformer.time_embed.time_mlp.0.bias": 1024,
"transformer.time_embed.time_mlp.2.weight": 1048576,
"transformer.time_embed.time_mlp.2.bias": 1024,
"transformer.text_embed.text_embed.weight": 254600,
"transformer.input_embed.proj.weight": 307200,
"transformer.input_embed.proj.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024,
"transformer.layers.0.1.g": 1024,
"transformer.layers.0.2.to_q.weight": 1048576,
"transformer.layers.0.2.to_q.bias": 1024,
"transformer.layers.0.2.to_k.weight": 1048576,
"transformer.layers.0.2.to_k.bias": 1024,
"transformer.layers.0.2.to_v.weight": 1048576,
"transformer.layers.0.2.to_v.bias": 1024,
"transformer.layers.0.2.to_out.0.weight": 1048576,
"transformer.layers.0.2.to_out.0.bias": 1024,
"transformer.layers.0.3.g": 1024,
"transformer.layers.0.4.ff.0.0.weight": 4194304,
"transformer.layers.0.4.ff.0.0.bias": 4096,
"transformer.layers.0.4.ff.2.weight": 4194304,
"transformer.layers.0.4.ff.2.bias": 1024,
"transformer.layers.1.1.g": 1024,
"transformer.layers.1.2.to_q.weight": 1048576,
"transformer.layers.1.2.to_q.bias": 1024,
"transformer.layers.1.2.to_k.weight": 1048576,
"transformer.layers.1.2.to_k.bias": 1024,
"transformer.layers.1.2.to_v.weight": 1048576,
"transformer.layers.1.2.to_v.bias": 1024,
"transformer.layers.1.2.to_out.0.weight": 1048576,
"transformer.layers.1.2.to_out.0.bias": 1024,
"transformer.layers.1.3.g": 1024,
"transformer.layers.1.4.ff.0.0.weight": 4194304,
"transformer.layers.1.4.ff.0.0.bias": 4096,
"transformer.layers.1.4.ff.2.weight": 4194304,
"transformer.layers.1.4.ff.2.bias": 1024,
"transformer.layers.2.1.g": 1024,
"transformer.layers.2.2.to_q.weight": 1048576,
"transformer.layers.2.2.to_q.bias": 1024,
"transformer.layers.2.2.to_k.weight": 1048576,
"transformer.layers.2.2.to_k.bias": 1024,
"transformer.layers.2.2.to_v.weight": 1048576,
"transformer.layers.2.2.to_v.bias": 1024,
"transformer.layers.2.2.to_out.0.weight": 1048576,
"transformer.layers.2.2.to_out.0.bias": 1024,
"transformer.layers.2.3.g": 1024,
"transformer.layers.2.4.ff.0.0.weight": 4194304,
"transformer.layers.2.4.ff.0.0.bias": 4096,
"transformer.layers.2.4.ff.2.weight": 4194304,
"transformer.layers.2.4.ff.2.bias": 1024,
"transformer.layers.3.1.g": 1024,
"transformer.layers.3.2.to_q.weight": 1048576,
"transformer.layers.3.2.to_q.bias": 1024,
"transformer.layers.3.2.to_k.weight": 1048576,
"transformer.layers.3.2.to_k.bias": 1024,
"transformer.layers.3.2.to_v.weight": 1048576,
"transformer.layers.3.2.to_v.bias": 1024,
"transformer.layers.3.2.to_out.0.weight": 1048576,
"transformer.layers.3.2.to_out.0.bias": 1024,
"transformer.layers.3.3.g": 1024,
"transformer.layers.3.4.ff.0.0.weight": 4194304,
"transformer.layers.3.4.ff.0.0.bias": 4096,
"transformer.layers.3.4.ff.2.weight": 4194304,
"transformer.layers.3.4.ff.2.bias": 1024,
"transformer.layers.4.1.g": 1024,
"transformer.layers.4.2.to_q.weight": 1048576,
"transformer.layers.4.2.to_q.bias": 1024,
"transformer.layers.4.2.to_k.weight": 1048576,
"transformer.layers.4.2.to_k.bias": 1024,
"transformer.layers.4.2.to_v.weight": 1048576,
"transformer.layers.4.2.to_v.bias": 1024,
"transformer.layers.4.2.to_out.0.weight": 1048576,
"transformer.layers.4.2.to_out.0.bias": 1024,
"transformer.layers.4.3.g": 1024,
"transformer.layers.4.4.ff.0.0.weight": 4194304,
"transformer.layers.4.4.ff.0.0.bias": 4096,
"transformer.layers.4.4.ff.2.weight": 4194304,
"transformer.layers.4.4.ff.2.bias": 1024,
"transformer.layers.5.1.g": 1024,
"transformer.layers.5.2.to_q.weight": 1048576,
"transformer.layers.5.2.to_q.bias": 1024,
"transformer.layers.5.2.to_k.weight": 1048576,
"transformer.layers.5.2.to_k.bias": 1024,
"transformer.layers.5.2.to_v.weight": 1048576,
"transformer.layers.5.2.to_v.bias": 1024,
"transformer.layers.5.2.to_out.0.weight": 1048576,
"transformer.layers.5.2.to_out.0.bias": 1024,
"transformer.layers.5.3.g": 1024,
"transformer.layers.5.4.ff.0.0.weight": 4194304,
"transformer.layers.5.4.ff.0.0.bias": 4096,
"transformer.layers.5.4.ff.2.weight": 4194304,
"transformer.layers.5.4.ff.2.bias": 1024,
"transformer.layers.6.1.g": 1024,
"transformer.layers.6.2.to_q.weight": 1048576,
"transformer.layers.6.2.to_q.bias": 1024,
"transformer.layers.6.2.to_k.weight": 1048576,
"transformer.layers.6.2.to_k.bias": 1024,
"transformer.layers.6.2.to_v.weight": 1048576,
"transformer.layers.6.2.to_v.bias": 1024,
"transformer.layers.6.2.to_out.0.weight": 1048576,
"transformer.layers.6.2.to_out.0.bias": 1024,
"transformer.layers.6.3.g": 1024,
"transformer.layers.6.4.ff.0.0.weight": 4194304,
"transformer.layers.6.4.ff.0.0.bias": 4096,
"transformer.layers.6.4.ff.2.weight": 4194304,
"transformer.layers.6.4.ff.2.bias": 1024,
"transformer.layers.7.1.g": 1024,
"transformer.layers.7.2.to_q.weight": 1048576,
"transformer.layers.7.2.to_q.bias": 1024,
"transformer.layers.7.2.to_k.weight": 1048576,
"transformer.layers.7.2.to_k.bias": 1024,
"transformer.layers.7.2.to_v.weight": 1048576,
"transformer.layers.7.2.to_v.bias": 1024,
"transformer.layers.7.2.to_out.0.weight": 1048576,
"transformer.layers.7.2.to_out.0.bias": 1024,
"transformer.layers.7.3.g": 1024,
"transformer.layers.7.4.ff.0.0.weight": 4194304,
"transformer.layers.7.4.ff.0.0.bias": 4096,
"transformer.layers.7.4.ff.2.weight": 4194304,
"transformer.layers.7.4.ff.2.bias": 1024,
"transformer.layers.8.1.g": 1024,
"transformer.layers.8.2.to_q.weight": 1048576,
"transformer.layers.8.2.to_q.bias": 1024,
"transformer.layers.8.2.to_k.weight": 1048576,
"transformer.layers.8.2.to_k.bias": 1024,
"transformer.layers.8.2.to_v.weight": 1048576,
"transformer.layers.8.2.to_v.bias": 1024,
"transformer.layers.8.2.to_out.0.weight": 1048576,
"transformer.layers.8.2.to_out.0.bias": 1024,
"transformer.layers.8.3.g": 1024,
"transformer.layers.8.4.ff.0.0.weight": 4194304,
"transformer.layers.8.4.ff.0.0.bias": 4096,
"transformer.layers.8.4.ff.2.weight": 4194304,
"transformer.layers.8.4.ff.2.bias": 1024,
"transformer.layers.9.1.g": 1024,
"transformer.layers.9.2.to_q.weight": 1048576,
"transformer.layers.9.2.to_q.bias": 1024,
"transformer.layers.9.2.to_k.weight": 1048576,
"transformer.layers.9.2.to_k.bias": 1024,
"transformer.layers.9.2.to_v.weight": 1048576,
"transformer.layers.9.2.to_v.bias": 1024,
"transformer.layers.9.2.to_out.0.weight": 1048576,
"transformer.layers.9.2.to_out.0.bias": 1024,
"transformer.layers.9.3.g": 1024,
"transformer.layers.9.4.ff.0.0.weight": 4194304,
"transformer.layers.9.4.ff.0.0.bias": 4096,
"transformer.layers.9.4.ff.2.weight": 4194304,
"transformer.layers.9.4.ff.2.bias": 1024,
"transformer.layers.10.1.g": 1024,
"transformer.layers.10.2.to_q.weight": 1048576,
"transformer.layers.10.2.to_q.bias": 1024,
"transformer.layers.10.2.to_k.weight": 1048576,
"transformer.layers.10.2.to_k.bias": 1024,
"transformer.layers.10.2.to_v.weight": 1048576,
"transformer.layers.10.2.to_v.bias": 1024,
"transformer.layers.10.2.to_out.0.weight": 1048576,
"transformer.layers.10.2.to_out.0.bias": 1024,
"transformer.layers.10.3.g": 1024,
"transformer.layers.10.4.ff.0.0.weight": 4194304,
"transformer.layers.10.4.ff.0.0.bias": 4096,
"transformer.layers.10.4.ff.2.weight": 4194304,
"transformer.layers.10.4.ff.2.bias": 1024,
"transformer.layers.11.1.g": 1024,
"transformer.layers.11.2.to_q.weight": 1048576,
"transformer.layers.11.2.to_q.bias": 1024,
"transformer.layers.11.2.to_k.weight": 1048576,
"transformer.layers.11.2.to_k.bias": 1024,
"transformer.layers.11.2.to_v.weight": 1048576,
"transformer.layers.11.2.to_v.bias": 1024,
"transformer.layers.11.2.to_out.0.weight": 1048576,
"transformer.layers.11.2.to_out.0.bias": 1024,
"transformer.layers.11.3.g": 1024,
"transformer.layers.11.4.ff.0.0.weight": 4194304,
"transformer.layers.11.4.ff.0.0.bias": 4096,
"transformer.layers.11.4.ff.2.weight": 4194304,
"transformer.layers.11.4.ff.2.bias": 1024,
"transformer.layers.12.1.g": 1024,
"transformer.layers.12.2.to_q.weight": 1048576,
"transformer.layers.12.2.to_q.bias": 1024,
"transformer.layers.12.2.to_k.weight": 1048576,
"transformer.layers.12.2.to_k.bias": 1024,
"transformer.layers.12.2.to_v.weight": 1048576,
"transformer.layers.12.2.to_v.bias": 1024,
"transformer.layers.12.2.to_out.0.weight": 1048576,
"transformer.layers.12.2.to_out.0.bias": 1024,
"transformer.layers.12.3.g": 1024,
"transformer.layers.12.4.ff.0.0.weight": 4194304,
"transformer.layers.12.4.ff.0.0.bias": 4096,
"transformer.layers.12.4.ff.2.weight": 4194304,
"transformer.layers.12.4.ff.2.bias": 1024,
"transformer.layers.13.0.weight": 2097152,
"transformer.layers.13.1.g": 1024,
"transformer.layers.13.2.to_q.weight": 1048576,
"transformer.layers.13.2.to_q.bias": 1024,
"transformer.layers.13.2.to_k.weight": 1048576,
"transformer.layers.13.2.to_k.bias": 1024,
"transformer.layers.13.2.to_v.weight": 1048576,
"transformer.layers.13.2.to_v.bias": 1024,
"transformer.layers.13.2.to_out.0.weight": 1048576,
"transformer.layers.13.2.to_out.0.bias": 1024,
"transformer.layers.13.3.g": 1024,
"transformer.layers.13.4.ff.0.0.weight": 4194304,
"transformer.layers.13.4.ff.0.0.bias": 4096,
"transformer.layers.13.4.ff.2.weight": 4194304,
"transformer.layers.13.4.ff.2.bias": 1024,
"transformer.layers.14.0.weight": 2097152,
"transformer.layers.14.1.g": 1024,
"transformer.layers.14.2.to_q.weight": 1048576,
"transformer.layers.14.2.to_q.bias": 1024,
"transformer.layers.14.2.to_k.weight": 1048576,
"transformer.layers.14.2.to_k.bias": 1024,
"transformer.layers.14.2.to_v.weight": 1048576,
"transformer.layers.14.2.to_v.bias": 1024,
"transformer.layers.14.2.to_out.0.weight": 1048576,
"transformer.layers.14.2.to_out.0.bias": 1024,
"transformer.layers.14.3.g": 1024,
"transformer.layers.14.4.ff.0.0.weight": 4194304,
"transformer.layers.14.4.ff.0.0.bias": 4096,
"transformer.layers.14.4.ff.2.weight": 4194304,
"transformer.layers.14.4.ff.2.bias": 1024,
"transformer.layers.15.0.weight": 2097152,
"transformer.layers.15.1.g": 1024,
"transformer.layers.15.2.to_q.weight": 1048576,
"transformer.layers.15.2.to_q.bias": 1024,
"transformer.layers.15.2.to_k.weight": 1048576,
"transformer.layers.15.2.to_k.bias": 1024,
"transformer.layers.15.2.to_v.weight": 1048576,
"transformer.layers.15.2.to_v.bias": 1024,
"transformer.layers.15.2.to_out.0.weight": 1048576,
"transformer.layers.15.2.to_out.0.bias": 1024,
"transformer.layers.15.3.g": 1024,
"transformer.layers.15.4.ff.0.0.weight": 4194304,
"transformer.layers.15.4.ff.0.0.bias": 4096,
"transformer.layers.15.4.ff.2.weight": 4194304,
"transformer.layers.15.4.ff.2.bias": 1024,
"transformer.layers.16.0.weight": 2097152,
"transformer.layers.16.1.g": 1024,
"transformer.layers.16.2.to_q.weight": 1048576,
"transformer.layers.16.2.to_q.bias": 1024,
"transformer.layers.16.2.to_k.weight": 1048576,
"transformer.layers.16.2.to_k.bias": 1024,
"transformer.layers.16.2.to_v.weight": 1048576,
"transformer.layers.16.2.to_v.bias": 1024,
"transformer.layers.16.2.to_out.0.weight": 1048576,
"transformer.layers.16.2.to_out.0.bias": 1024,
"transformer.layers.16.3.g": 1024,
"transformer.layers.16.4.ff.0.0.weight": 4194304,
"transformer.layers.16.4.ff.0.0.bias": 4096,
"transformer.layers.16.4.ff.2.weight": 4194304,
"transformer.layers.16.4.ff.2.bias": 1024,
"transformer.layers.17.0.weight": 2097152,
"transformer.layers.17.1.g": 1024,
"transformer.layers.17.2.to_q.weight": 1048576,
"transformer.layers.17.2.to_q.bias": 1024,
"transformer.layers.17.2.to_k.weight": 1048576,
"transformer.layers.17.2.to_k.bias": 1024,
"transformer.layers.17.2.to_v.weight": 1048576,
"transformer.layers.17.2.to_v.bias": 1024,
"transformer.layers.17.2.to_out.0.weight": 1048576,
"transformer.layers.17.2.to_out.0.bias": 1024,
"transformer.layers.17.3.g": 1024,
"transformer.layers.17.4.ff.0.0.weight": 4194304,
"transformer.layers.17.4.ff.0.0.bias": 4096,
"transformer.layers.17.4.ff.2.weight": 4194304,
"transformer.layers.17.4.ff.2.bias": 1024,
"transformer.layers.18.0.weight": 2097152,
"transformer.layers.18.1.g": 1024,
"transformer.layers.18.2.to_q.weight": 1048576,
"transformer.layers.18.2.to_q.bias": 1024,
"transformer.layers.18.2.to_k.weight": 1048576,
"transformer.layers.18.2.to_k.bias": 1024,
"transformer.layers.18.2.to_v.weight": 1048576,
"transformer.layers.18.2.to_v.bias": 1024,
"transformer.layers.18.2.to_out.0.weight": 1048576,
"transformer.layers.18.2.to_out.0.bias": 1024,
"transformer.layers.18.3.g": 1024,
"transformer.layers.18.4.ff.0.0.weight": 4194304,
"transformer.layers.18.4.ff.0.0.bias": 4096,
"transformer.layers.18.4.ff.2.weight": 4194304,
"transformer.layers.18.4.ff.2.bias": 1024,
"transformer.layers.19.0.weight": 2097152,
"transformer.layers.19.1.g": 1024,
"transformer.layers.19.2.to_q.weight": 1048576,
"transformer.layers.19.2.to_q.bias": 1024,
"transformer.layers.19.2.to_k.weight": 1048576,
"transformer.layers.19.2.to_k.bias": 1024,
"transformer.layers.19.2.to_v.weight": 1048576,
"transformer.layers.19.2.to_v.bias": 1024,
"transformer.layers.19.2.to_out.0.weight": 1048576,
"transformer.layers.19.2.to_out.0.bias": 1024,
"transformer.layers.19.3.g": 1024,
"transformer.layers.19.4.ff.0.0.weight": 4194304,
"transformer.layers.19.4.ff.0.0.bias": 4096,
"transformer.layers.19.4.ff.2.weight": 4194304,
"transformer.layers.19.4.ff.2.bias": 1024,
"transformer.layers.20.0.weight": 2097152,
"transformer.layers.20.1.g": 1024,
"transformer.layers.20.2.to_q.weight": 1048576,
"transformer.layers.20.2.to_q.bias": 1024,
"transformer.layers.20.2.to_k.weight": 1048576,
"transformer.layers.20.2.to_k.bias": 1024,
"transformer.layers.20.2.to_v.weight": 1048576,
"transformer.layers.20.2.to_v.bias": 1024,
"transformer.layers.20.2.to_out.0.weight": 1048576,
"transformer.layers.20.2.to_out.0.bias": 1024,
"transformer.layers.20.3.g": 1024,
"transformer.layers.20.4.ff.0.0.weight": 4194304,
"transformer.layers.20.4.ff.0.0.bias": 4096,
"transformer.layers.20.4.ff.2.weight": 4194304,
"transformer.layers.20.4.ff.2.bias": 1024,
"transformer.layers.21.0.weight": 2097152,
"transformer.layers.21.1.g": 1024,
"transformer.layers.21.2.to_q.weight": 1048576,
"transformer.layers.21.2.to_q.bias": 1024,
"transformer.layers.21.2.to_k.weight": 1048576,
"transformer.layers.21.2.to_k.bias": 1024,
"transformer.layers.21.2.to_v.weight": 1048576,
"transformer.layers.21.2.to_v.bias": 1024,
"transformer.layers.21.2.to_out.0.weight": 1048576,
"transformer.layers.21.2.to_out.0.bias": 1024,
"transformer.layers.21.3.g": 1024,
"transformer.layers.21.4.ff.0.0.weight": 4194304,
"transformer.layers.21.4.ff.0.0.bias": 4096,
"transformer.layers.21.4.ff.2.weight": 4194304,
"transformer.layers.21.4.ff.2.bias": 1024,
"transformer.layers.22.0.weight": 2097152,
"transformer.layers.22.1.g": 1024,
"transformer.layers.22.2.to_q.weight": 1048576,
"transformer.layers.22.2.to_q.bias": 1024,
"transformer.layers.22.2.to_k.weight": 1048576,
"transformer.layers.22.2.to_k.bias": 1024,
"transformer.layers.22.2.to_v.weight": 1048576,
"transformer.layers.22.2.to_v.bias": 1024,
"transformer.layers.22.2.to_out.0.weight": 1048576,
"transformer.layers.22.2.to_out.0.bias": 1024,
"transformer.layers.22.3.g": 1024,
"transformer.layers.22.4.ff.0.0.weight": 4194304,
"transformer.layers.22.4.ff.0.0.bias": 4096,
"transformer.layers.22.4.ff.2.weight": 4194304,
"transformer.layers.22.4.ff.2.bias": 1024,
"transformer.layers.23.0.weight": 2097152,
"transformer.layers.23.1.g": 1024,
"transformer.layers.23.2.to_q.weight": 1048576,
"transformer.layers.23.2.to_q.bias": 1024,
"transformer.layers.23.2.to_k.weight": 1048576,
"transformer.layers.23.2.to_k.bias": 1024,
"transformer.layers.23.2.to_v.weight": 1048576,
"transformer.layers.23.2.to_v.bias": 1024,
"transformer.layers.23.2.to_out.0.weight": 1048576,
"transformer.layers.23.2.to_out.0.bias": 1024,
"transformer.layers.23.3.g": 1024,
"transformer.layers.23.4.ff.0.0.weight": 4194304,
"transformer.layers.23.4.ff.0.0.bias": 4096,
"transformer.layers.23.4.ff.2.weight": 4194304,
"transformer.layers.23.4.ff.2.bias": 1024,
"transformer.layers.24.0.weight": 2097152,
"transformer.layers.24.1.g": 1024,
"transformer.layers.24.2.to_q.weight": 1048576,
"transformer.layers.24.2.to_q.bias": 1024,
"transformer.layers.24.2.to_k.weight": 1048576,
"transformer.layers.24.2.to_k.bias": 1024,
"transformer.layers.24.2.to_v.weight": 1048576,
"transformer.layers.24.2.to_v.bias": 1024,
"transformer.layers.24.2.to_out.0.weight": 1048576,
"transformer.layers.24.2.to_out.0.bias": 1024,
"transformer.layers.24.3.g": 1024,
"transformer.layers.24.4.ff.0.0.weight": 4194304,
"transformer.layers.24.4.ff.0.0.bias": 4096,
"transformer.layers.24.4.ff.2.weight": 4194304,
"transformer.layers.24.4.ff.2.bias": 1024,
"transformer.layers.25.0.weight": 2097152,
"transformer.layers.25.1.g": 1024,
"transformer.layers.25.2.to_q.weight": 1048576,
"transformer.layers.25.2.to_q.bias": 1024,
"transformer.layers.25.2.to_k.weight": 1048576,
"transformer.layers.25.2.to_k.bias": 1024,
"transformer.layers.25.2.to_v.weight": 1048576,
"transformer.layers.25.2.to_v.bias": 1024,
"transformer.layers.25.2.to_out.0.weight": 1048576,
"transformer.layers.25.2.to_out.0.bias": 1024,
"transformer.layers.25.3.g": 1024,
"transformer.layers.25.4.ff.0.0.weight": 4194304,
"transformer.layers.25.4.ff.0.0.bias": 4096,
"transformer.layers.25.4.ff.2.weight": 4194304,
"transformer.layers.25.4.ff.2.bias": 1024,
"transformer.norm_out.g": 1024,
"transformer.proj_out.weight": 102400,
"transformer.proj_out.bias": 100
},
"important_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
],
"bottleneck_layers": [],
"recommendations": {
"focus_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
]
},
"total_parameters": 391,
"total_elements": 360755948,
"param_ranges": {
"transformer.time_embed.time_mlp.0.weight": {
"min": -0.43053385615348816,
"max": 0.2987181544303894,
"mean": -0.0025508857797831297,
"std": 0.04255492985248566,
"sparsity": 0.0,
"shape": [
1024,
256
]
},
"transformer.time_embed.time_mlp.0.bias": {
"min": -0.06311193853616714,
"max": 0.10768741369247437,
"mean": 0.0006200151983648539,
"std": 0.03410356491804123,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.time_embed.time_mlp.2.weight": {
"min": -0.41268399357795715,
"max": 0.8365581035614014,
"mean": -0.00020668540673796088,
"std": 0.02410803735256195,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.time_embed.time_mlp.2.bias": {
"min": -0.11543754488229752,
"max": 0.3218643069267273,
"mean": -0.0009378742543049157,
"std": 0.019571715965867043,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.text_embed.text_embed.weight": {
"min": -2.7987403869628906,
"max": 2.8775689601898193,
"mean": -0.0003620539791882038,
"std": 0.6153795123100281,
"sparsity": 0.0,
"shape": [
2546,
100
]
},
"transformer.input_embed.proj.weight": {
"min": -0.2798178493976593,
"max": 0.38195931911468506,
"mean": 0.0004235386732034385,
"std": 0.04274815320968628,
"sparsity": 0.0,
"shape": [
1024,
300
]
},
"transformer.input_embed.proj.bias": {
"min": -0.22243480384349823,
"max": 0.20970797538757324,
"mean": -0.004494894295930862,
"std": 0.04093479365110397,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": {
"min": -0.42797601222991943,
"max": 0.47545987367630005,
"mean": 3.68623682334146e-06,
"std": 0.024507373571395874,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": {
"min": -0.32538774609565735,
"max": 0.15757951140403748,
"mean": -0.046732865273952484,
"std": 0.05161404609680176,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": {
"min": -0.4106280207633972,
"max": 0.35474810004234314,
"mean": -0.000128601081087254,
"std": 0.02359883114695549,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": {
"min": -0.22982768714427948,
"max": 0.2626851797103882,
"mean": -0.029157839715480804,
"std": 0.04937523230910301,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.1.g": {
"min": 0.2546607255935669,
"max": 0.8210369348526001,
"mean": 0.5255380868911743,
"std": 0.08102277666330338,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_q.weight": {
"min": -0.29706501960754395,
"max": 0.26598596572875977,
"mean": -0.0004244564042892307,
"std": 0.03210071846842766,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_q.bias": {
"min": -0.09280094504356384,
"max": 0.12531320750713348,
"mean": 0.0006500966264866292,
"std": 0.025744492188096046,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_k.weight": {
"min": -0.2907920181751251,
"max": 0.2819848656654358,
"mean": -7.519756036344916e-05,
"std": 0.030932072550058365,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_k.bias": {
"min": -5.9063615798950195,
"max": 5.821039199829102,
"mean": -0.009349350817501545,
"std": 1.2963582277297974,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_v.weight": {
"min": -0.4252597391605377,
"max": 0.3442302644252777,
"mean": 9.807890455704182e-05,
"std": 0.029951980337500572,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_v.bias": {
"min": -0.02886926755309105,
"max": 0.027612265199422836,
"mean": -0.0003159886400680989,
"std": 0.012566552497446537,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_out.0.weight": {
"min": -0.4545641541481018,
"max": 0.4486750364303589,
"mean": 2.288275572936982e-05,
"std": 0.023853391408920288,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_out.0.bias": {
"min": -0.08885892480611801,
"max": 0.09123405814170837,
"mean": 0.002273206366226077,
"std": 0.019519906491041183,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.3.g": {
"min": 0.26680853962898254,
"max": 1.0574053525924683,
"mean": 0.5312761068344116,
"std": 0.10467371344566345,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.4.ff.0.0.weight": {
"min": -0.575035810470581,
"max": 0.6089199781417847,
"mean": -0.00043114880099892616,
"std": 0.03859530761837959,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.0.4.ff.0.0.bias": {
"min": -0.18226587772369385,
"max": 0.04570382833480835,
"mean": -0.029475372284650803,
"std": 0.04265210032463074,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.0.4.ff.2.weight": {
"min": -1.1675195693969727,
"max": 1.6349984407424927,
"mean": 0.00032014260068535805,
"std": 0.02769290842115879,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.0.4.ff.2.bias": {
"min": -0.16255705058574677,
"max": 0.20596350729465485,
"mean": -0.021122729405760765,
"std": 0.0279533751308918,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.1.g": {
"min": 0.2242354154586792,
"max": 0.8446622490882874,
"mean": 0.4876382350921631,
"std": 0.07536358386278152,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_q.weight": {
"min": -0.2560102641582489,
"max": 0.3063015341758728,
"mean": -8.342660294147208e-06,
"std": 0.03346908837556839,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_q.bias": {
"min": -0.09542153775691986,
"max": 0.11059843748807907,
"mean": 6.575271254405379e-05,
"std": 0.026967303827404976,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_k.weight": {
"min": -0.29774004220962524,
"max": 0.296736478805542,
"mean": 5.098901965538971e-05,
"std": 0.03253892436623573,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_k.bias": {
"min": -5.170334339141846,
"max": 5.090466022491455,
"mean": -0.014626836404204369,
"std": 1.1584166288375854,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_v.weight": {
"min": -0.34492507576942444,
"max": 0.3436436355113983,
"mean": 7.888609980000183e-05,
"std": 0.030058223754167557,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_v.bias": {
"min": -0.03619777783751488,
"max": 0.033210255205631256,
"mean": -0.00014313205610960722,
"std": 0.013021216727793217,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_out.0.weight": {
"min": -0.31573694944381714,
"max": 0.37568625807762146,
"mean": -2.092823342536576e-05,
"std": 0.024055050686001778,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_out.0.bias": {
"min": -0.10554195195436478,
"max": 0.12217912822961807,
"mean": -0.001965724630281329,
"std": 0.02885899320244789,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.3.g": {
"min": 0.31185638904571533,
"max": 1.1226844787597656,
"mean": 0.6664173007011414,
"std": 0.09809636324644089,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.4.ff.0.0.weight": {
"min": -0.8725683689117432,
"max": 0.6277270317077637,
"mean": 0.001675453968346119,
"std": 0.04743659123778343,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.1.4.ff.0.0.bias": {
"min": -0.27133694291114807,
"max": 0.034276124089956284,
"mean": -0.04661266878247261,
"std": 0.04062533751130104,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.1.4.ff.2.weight": {
"min": -0.9219098687171936,
"max": 0.9648231863975525,
"mean": 0.0010219970718026161,
"std": 0.04070163145661354,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.1.4.ff.2.bias": {
"min": -0.14473342895507812,
"max": 0.07504827529191971,
"mean": -0.009093794040381908,
"std": 0.025712795555591583,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.1.g": {
"min": 0.23969869315624237,
"max": 0.7134895920753479,
"mean": 0.4472740888595581,
"std": 0.05947508662939072,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_q.weight": {
"min": -0.273428350687027,
"max": 0.2982955574989319,
"mean": 8.738919859752059e-06,
"std": 0.03547058627009392,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_q.bias": {
"min": -0.11919642984867096,
"max": 0.11864279955625534,
"mean": 0.0007499873172491789,
"std": 0.027633123099803925,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_k.weight": {
"min": -0.2816983759403229,
"max": 0.2803042232990265,
"mean": -7.669557089684531e-05,
"std": 0.03509991616010666,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_k.bias": {
"min": -2.514967679977417,
"max": 2.5269885063171387,
"mean": 0.026808204129338264,
"std": 0.587342381477356,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_v.weight": {
"min": -0.22164961695671082,
"max": 0.27225953340530396,
"mean": 2.8316171665210277e-06,
"std": 0.03073127381503582,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_v.bias": {
"min": -0.033373475074768066,
"max": 0.031244782730937004,
"mean": 0.00011742905917344615,
"std": 0.012399322353303432,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_out.0.weight": {
"min": -0.23557811975479126,
"max": 0.23209546506404877,
"mean": 5.68200193811208e-05,
"std": 0.025696886703372,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_out.0.bias": {
"min": -0.13604120910167694,
"max": 0.1282019019126892,
"mean": -0.005500663537532091,
"std": 0.0399833545088768,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.3.g": {
"min": 0.3545033931732178,
"max": 1.174311876296997,
"mean": 0.7105965614318848,
"std": 0.10393685102462769,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.4.ff.0.0.weight": {
"min": -0.6180550456047058,
"max": 0.555590033531189,
"mean": 0.0011606597108766437,
"std": 0.046113595366477966,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.2.4.ff.0.0.bias": {
"min": -0.1892954707145691,
"max": 0.024854592978954315,
"mean": -0.034856364130973816,
"std": 0.028640495613217354,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.2.4.ff.2.weight": {
"min": -1.1327383518218994,
"max": 0.972023606300354,
"mean": 0.00035934254992753267,
"std": 0.04234174266457558,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.2.4.ff.2.bias": {
"min": -0.5991520285606384,
"max": 0.06305119395256042,
"mean": -0.004881403874605894,
"std": 0.02864677459001541,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.1.g": {
"min": 0.3754131495952606,
"max": 0.9439838528633118,
"mean": 0.5927106142044067,
"std": 0.0675281211733818,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_q.weight": {
"min": -0.3920239806175232,
"max": 0.36984747648239136,
"mean": 7.029663538560271e-05,
"std": 0.03718528896570206,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_q.bias": {
"min": -0.11922823637723923,
"max": 0.13680268824100494,
"mean": 0.0009289362351410091,
"std": 0.029231850057840347,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_k.weight": {
"min": -0.6204254031181335,
"max": 0.5099692940711975,
"mean": 1.5338478988269344e-05,
"std": 0.03643814101815224,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_k.bias": {
"min": -8.20434856414795,
"max": 8.80774211883545,
"mean": -0.10939832031726837,
"std": 1.7015736103057861,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_v.weight": {
"min": -0.2770227789878845,
"max": 0.2399866282939911,
"mean": 5.2325925935292616e-05,
"std": 0.032612308859825134,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_v.bias": {
"min": -0.05187734216451645,
"max": 0.039576977491378784,
"mean": 9.007145126815885e-05,
"std": 0.01296569500118494,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_out.0.weight": {
"min": -0.23111766576766968,
"max": 0.23510430753231049,
"mean": -2.2175441699801013e-05,
"std": 0.029389016330242157,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_out.0.bias": {
"min": -0.20455272495746613,
"max": 0.10541031509637833,
"mean": -0.0040219868533313274,
"std": 0.03264109417796135,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.3.g": {
"min": 0.3396739959716797,
"max": 1.0156350135803223,
"mean": 0.7007465362548828,
"std": 0.09685582667589188,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.4.ff.0.0.weight": {
"min": -0.5659961700439453,
"max": 0.8350182771682739,
"mean": 0.0004152171895839274,
"std": 0.042294152081012726,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.3.4.ff.0.0.bias": {
"min": -0.2122603803873062,
"max": 0.03037133999168873,
"mean": -0.03219597041606903,
"std": 0.026528161019086838,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.3.4.ff.2.weight": {
"min": -0.7565364837646484,
"max": 0.7206384539604187,
"mean": -1.6425212379544973e-05,
"std": 0.03683505579829216,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.3.4.ff.2.bias": {
"min": -0.2637326717376709,
"max": 0.10635162889957428,
"mean": -0.003013473004102707,
"std": 0.028875315561890602,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.1.g": {
"min": 0.2841384708881378,
"max": 0.6960581541061401,
"mean": 0.4994935393333435,
"std": 0.046687543392181396,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_q.weight": {
"min": -0.27911630272865295,
"max": 0.23450130224227905,
"mean": -0.00011085053847637028,
"std": 0.038756489753723145,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_q.bias": {
"min": -0.15422014892101288,
"max": 0.1267157793045044,
"mean": -0.0022325206082314253,
"std": 0.03337828069925308,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_k.weight": {
"min": -0.4152379035949707,
"max": 0.6604457497596741,
"mean": -1.880790659924969e-05,
"std": 0.0390951968729496,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_k.bias": {
"min": -4.242863178253174,
"max": 4.727988243103027,
"mean": -0.020436234772205353,
"std": 1.0083643198013306,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_v.weight": {
"min": -0.24566201865673065,
"max": 0.2078404426574707,
"mean": 4.393987182993442e-05,
"std": 0.03396216034889221,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_v.bias": {
"min": -0.03461671993136406,
"max": 0.04490647837519646,
"mean": -1.8480626749806106e-05,
"std": 0.012636142782866955,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_out.0.weight": {
"min": -0.201488196849823,
"max": 0.20679476857185364,
"mean": -2.9119719329173677e-05,
"std": 0.03102005459368229,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_out.0.bias": {
"min": -0.2001321166753769,
"max": 0.11347545683383942,
"mean": -0.0028973689768463373,
"std": 0.03452814370393753,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.3.g": {
"min": 0.3668859004974365,
"max": 1.0606576204299927,
"mean": 0.6705638766288757,
"std": 0.06651072949171066,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.4.ff.0.0.weight": {
"min": -0.3992147445678711,
"max": 0.5030191540718079,
"mean": -3.829112756648101e-05,
"std": 0.04113021492958069,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.4.4.ff.0.0.bias": {
"min": -0.12906894087791443,
"max": 0.02686660923063755,
"mean": -0.030545957386493683,
"std": 0.02190822921693325,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.4.4.ff.2.weight": {
"min": -0.45001378655433655,
"max": 0.43416494131088257,
"mean": 7.559473306173459e-05,
"std": 0.03489038348197937,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.4.4.ff.2.bias": {
"min": -0.26795703172683716,
"max": 0.07305809110403061,
"mean": -0.0010922406800091267,
"std": 0.023138197138905525,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.1.g": {
"min": 0.28738605976104736,
"max": 0.6873639225959778,
"mean": 0.5246094465255737,
"std": 0.04773576930165291,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_q.weight": {
"min": -0.2230275720357895,
"max": 0.22428689897060394,
"mean": 1.5606414308422245e-05,
"std": 0.03894846886396408,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_q.bias": {
"min": -0.1365431845188141,
"max": 0.1094546914100647,
"mean": 0.0002404236583970487,
"std": 0.02924003079533577,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_k.weight": {
"min": -0.375844806432724,
"max": 0.4382041096687317,
"mean": -9.796498488867655e-06,
"std": 0.039285723119974136,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_k.bias": {
"min": -3.8503658771514893,
"max": 5.0051727294921875,
"mean": 0.009742870926856995,
"std": 0.8458123803138733,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_v.weight": {
"min": -0.22342294454574585,
"max": 0.22070662677288055,
"mean": -2.869974196073599e-07,
"std": 0.03440912440419197,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_v.bias": {
"min": -0.04364994913339615,
"max": 0.03587768226861954,
"mean": -0.00025836972054094076,
"std": 0.012079192325472832,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_out.0.weight": {
"min": -0.21351207792758942,
"max": 0.18924757838249207,
"mean": -1.7089078028220683e-05,
"std": 0.03153553605079651,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_out.0.bias": {
"min": -0.18106123805046082,
"max": 0.12093079835176468,
"mean": -0.0023932361509650946,
"std": 0.04127350077033043,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.3.g": {
"min": 0.4227260649204254,
"max": 0.9448354244232178,
"mean": 0.662743330001831,
"std": 0.05696980655193329,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.4.ff.0.0.weight": {
"min": -0.37132784724235535,
"max": 0.4766311049461365,
"mean": -8.210691157728434e-05,
"std": 0.0408891923725605,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.5.4.ff.0.0.bias": {
"min": -0.20899881422519684,
"max": 0.027237456291913986,
"mean": -0.03024902194738388,
"std": 0.02138604037463665,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.5.4.ff.2.weight": {
"min": -0.3416992723941803,
"max": 0.735672652721405,
"mean": 8.195172995328903e-05,
"std": 0.03476583957672119,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.5.4.ff.2.bias": {
"min": -0.2404329776763916,
"max": 0.05046902596950531,
"mean": -0.001188310096040368,
"std": 0.020469345152378082,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.1.g": {
"min": 0.3061925768852234,
"max": 0.654449999332428,
"mean": 0.5251765251159668,
"std": 0.04624079912900925,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_q.weight": {
"min": -0.3049762547016144,
"max": 0.21794484555721283,
"mean": 7.015730807324871e-05,
"std": 0.03949474170804024,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_q.bias": {
"min": -0.14950910210609436,
"max": 0.13137659430503845,
"mean": 0.000338978337822482,
"std": 0.030483614653348923,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_k.weight": {
"min": -0.2578408122062683,
"max": 0.20263631641864777,
"mean": 3.113361162832007e-05,
"std": 0.03948460891842842,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_k.bias": {
"min": -2.339005708694458,
"max": 2.378676176071167,
"mean": -0.026260126382112503,
"std": 0.45006638765335083,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_v.weight": {
"min": -0.18907499313354492,
"max": 0.21106310188770294,
"mean": 3.715493221534416e-05,
"std": 0.03479326516389847,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_v.bias": {
"min": -0.031842123717069626,
"max": 0.03563522920012474,
"mean": -0.00019889514078386128,
"std": 0.012288383208215237,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_out.0.weight": {
"min": -0.18906620144844055,
"max": 0.17065204679965973,
"mean": -6.830548954894766e-05,
"std": 0.032169949263334274,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_out.0.bias": {
"min": -0.13967929780483246,
"max": 0.13765227794647217,
"mean": -0.0025106696411967278,
"std": 0.051296915858983994,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.3.g": {
"min": 0.4670410752296448,
"max": 0.9571460485458374,
"mean": 0.668942928314209,
"std": 0.052938032895326614,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.4.ff.0.0.weight": {
"min": -0.32453829050064087,
"max": 0.3093876242637634,
"mean": -9.305285857408307e-07,
"std": 0.04094512388110161,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.6.4.ff.0.0.bias": {
"min": -0.12521511316299438,
"max": 0.025563344359397888,
"mean": -0.030704183503985405,
"std": 0.01984286867082119,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.6.4.ff.2.weight": {
"min": -0.4407917857170105,
"max": 0.4464106857776642,
"mean": 9.500519081484526e-05,
"std": 0.03511863574385643,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.6.4.ff.2.bias": {
"min": -0.22505341470241547,
"max": 0.051904987543821335,
"mean": -0.0011818333296105266,
"std": 0.018484966829419136,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.1.g": {
"min": 0.33911338448524475,
"max": 0.7404670715332031,
"mean": 0.5587128400802612,
"std": 0.04148301109671593,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_q.weight": {
"min": -0.27348393201828003,
"max": 0.2790311872959137,
"mean": 2.0330318875494413e-05,
"std": 0.041056688874959946,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_q.bias": {
"min": -0.13706564903259277,
"max": 0.14011380076408386,
"mean": 0.0004902533255517483,
"std": 0.026642272248864174,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_k.weight": {
"min": -0.4915930926799774,
"max": 0.35670116543769836,
"mean": 8.893256745068356e-05,
"std": 0.04069444537162781,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_k.bias": {
"min": -2.300570249557495,
"max": 1.7478224039077759,
"mean": -0.021113090217113495,
"std": 0.5004414319992065,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_v.weight": {
"min": -0.21829766035079956,
"max": 0.19811730086803436,
"mean": -4.052483200212009e-05,
"std": 0.034232448786497116,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_v.bias": {
"min": -0.041401587426662445,
"max": 0.038982585072517395,
"mean": -0.00013965339167043567,
"std": 0.012888636440038681,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_out.0.weight": {
"min": -0.17814268171787262,
"max": 0.1835789680480957,
"mean": 4.7900641220621765e-05,
"std": 0.031555790454149246,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_out.0.bias": {
"min": -0.1802123337984085,
"max": 0.1839253157377243,
"mean": -0.0022146895062178373,
"std": 0.05485367402434349,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.3.g": {
"min": 0.47431865334510803,
"max": 1.0268715620040894,
"mean": 0.6453023552894592,
"std": 0.05052410438656807,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.4.ff.0.0.weight": {
"min": -0.27204346656799316,
"max": 0.30987846851348877,
"mean": 0.00011226898641325533,
"std": 0.04068146273493767,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.7.4.ff.0.0.bias": {
"min": -0.10556552559137344,
"max": 0.02664870023727417,
"mean": -0.02952779270708561,
"std": 0.017950357869267464,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.7.4.ff.2.weight": {
"min": -0.3395896553993225,
"max": 0.3302164077758789,
"mean": 5.2438736020121723e-05,
"std": 0.03441261127591133,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.7.4.ff.2.bias": {
"min": -0.18205386400222778,
"max": 0.04234303906559944,
"mean": -0.0010605738498270512,
"std": 0.01722128316760063,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.1.g": {
"min": 0.3254714906215668,
"max": 0.6875306367874146,
"mean": 0.5112907886505127,
"std": 0.03710601106286049,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_q.weight": {
"min": -0.23404580354690552,
"max": 0.22564062476158142,
"mean": -3.628679769462906e-05,
"std": 0.03917597234249115,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_q.bias": {
"min": -0.1157650500535965,
"max": 0.13217955827713013,
"mean": 0.00015458319103345275,
"std": 0.02921123616397381,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_k.weight": {
"min": -0.3531610369682312,
"max": 0.28566646575927734,
"mean": 7.01215958542889e-06,
"std": 0.03924458101391792,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_k.bias": {
"min": -4.1371684074401855,
"max": 3.5479142665863037,
"mean": -0.011608399450778961,
"std": 0.6831862926483154,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_v.weight": {
"min": -0.2113451212644577,
"max": 0.20978450775146484,
"mean": 3.466297494014725e-05,
"std": 0.03448467701673508,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_v.bias": {
"min": -0.0358961820602417,
"max": 0.04827914386987686,
"mean": 0.000792390201240778,
"std": 0.012867480516433716,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_out.0.weight": {
"min": -0.21074581146240234,
"max": 0.19335627555847168,
"mean": -1.3081223642075201e-06,
"std": 0.031695783138275146,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_out.0.bias": {
"min": -0.18677425384521484,
"max": 0.17732204496860504,
"mean": -0.002835639752447605,
"std": 0.05864328145980835,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.3.g": {
"min": 0.47452113032341003,
"max": 1.0454236268997192,
"mean": 0.651544451713562,
"std": 0.05015648156404495,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.4.ff.0.0.weight": {
"min": -0.2484469711780548,
"max": 0.3293426036834717,
"mean": 0.00018075718253385276,
"std": 0.04056986793875694,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.8.4.ff.0.0.bias": {
"min": -0.12511543929576874,
"max": 0.024807237088680267,
"mean": -0.03050871379673481,
"std": 0.017624877393245697,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.8.4.ff.2.weight": {
"min": -0.42189329862594604,
"max": 0.4829460680484772,
"mean": -1.433467332390137e-06,
"std": 0.0353967621922493,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.8.4.ff.2.bias": {
"min": -0.1517147570848465,
"max": 0.043470486998558044,
"mean": 4.9440553993918e-05,
"std": 0.014891887083649635,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.1.g": {
"min": 0.31546592712402344,
"max": 0.6829473972320557,
"mean": 0.552940845489502,
"std": 0.0407881923019886,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_q.weight": {
"min": -0.20687490701675415,
"max": 0.22027458250522614,
"mean": 3.187588299624622e-05,
"std": 0.03829912096261978,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_q.bias": {
"min": -0.1380588412284851,
"max": 0.11287239193916321,
"mean": 2.8096917958464473e-05,
"std": 0.025843404233455658,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_k.weight": {
"min": -0.40360599756240845,
"max": 0.37176549434661865,
"mean": 2.5846293283393607e-05,
"std": 0.03817979246377945,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_k.bias": {
"min": -3.7753050327301025,
"max": 2.8720550537109375,
"mean": 0.001174271572381258,
"std": 0.5172262787818909,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_v.weight": {
"min": -0.203634575009346,
"max": 0.19783173501491547,
"mean": 2.9641731089213863e-05,
"std": 0.034296903759241104,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_v.bias": {
"min": -0.050782062113285065,
"max": 0.039943333715200424,
"mean": -0.00042034429498016834,
"std": 0.01341927982866764,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_out.0.weight": {
"min": -0.1968999058008194,
"max": 0.20258377492427826,
"mean": -1.2486772902775556e-05,
"std": 0.03180483356118202,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_out.0.bias": {
"min": -0.19323131442070007,
"max": 0.19526611268520355,
"mean": -0.002963971346616745,
"std": 0.06255338340997696,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.3.g": {
"min": 0.34893983602523804,
"max": 1.0871814489364624,
"mean": 0.6672742962837219,
"std": 0.05565904080867767,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.4.ff.0.0.weight": {
"min": -0.22620373964309692,
"max": 0.251870721578598,
"mean": 0.00035865549580194056,
"std": 0.040759552270174026,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.9.4.ff.0.0.bias": {
"min": -0.09141312539577484,
"max": 0.043738093227148056,
"mean": -0.03009146638214588,
"std": 0.017630403861403465,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.9.4.ff.2.weight": {
"min": -0.3538018465042114,
"max": 0.30474764108657837,
"mean": -4.393163908389397e-05,
"std": 0.03712210804224014,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.9.4.ff.2.bias": {
"min": -0.16189776360988617,
"max": 0.06336814165115356,
"mean": -8.093340147752315e-05,
"std": 0.019419532269239426,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.1.g": {
"min": 0.34863826632499695,
"max": 0.7244340777397156,
"mean": 0.5424437522888184,
"std": 0.039265505969524384,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_q.weight": {
"min": -0.21948550641536713,
"max": 0.22342580556869507,
"mean": -1.1189426913915668e-05,
"std": 0.039230361580848694,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_q.bias": {
"min": -0.11868271231651306,
"max": 0.17081572115421295,
"mean": 0.00028613023459911346,
"std": 0.025137728080153465,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_k.weight": {
"min": -0.24677123129367828,
"max": 0.30096495151519775,
"mean": -3.686630952870473e-05,
"std": 0.03892983868718147,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_k.bias": {
"min": -3.5091044902801514,
"max": 3.718792676925659,
"mean": 0.01584971882402897,
"std": 0.7831407189369202,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_v.weight": {
"min": -0.21897639334201813,
"max": 0.23756206035614014,
"mean": -1.3331029549590312e-05,
"std": 0.036302708089351654,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_v.bias": {
"min": -0.047262854874134064,
"max": 0.05141079053282738,
"mean": 0.00047719862777739763,
"std": 0.013516917824745178,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_out.0.weight": {
"min": -0.2142534703016281,
"max": 0.21756578981876373,
"mean": 5.647125362884253e-05,
"std": 0.03361497074365616,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_out.0.bias": {
"min": -0.21157103776931763,
"max": 0.23160234093666077,
"mean": -0.005100839305669069,
"std": 0.06188952922821045,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.3.g": {
"min": 0.3621518015861511,
"max": 1.1046018600463867,
"mean": 0.6994094252586365,
"std": 0.0540287047624588,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.4.ff.0.0.weight": {
"min": -0.23531383275985718,
"max": 0.24546286463737488,
"mean": 0.0004634457582142204,
"std": 0.0412684828042984,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.10.4.ff.0.0.bias": {
"min": -0.0981631875038147,
"max": 0.06831478327512741,
"mean": -0.031439878046512604,
"std": 0.01814098283648491,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.10.4.ff.2.weight": {
"min": -0.30264386534690857,
"max": 0.3523462414741516,
"mean": -8.214355329982936e-05,
"std": 0.0402742475271225,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.10.4.ff.2.bias": {
"min": -0.15258914232254028,
"max": 0.14998860657215118,
"mean": 0.0002567686606198549,
"std": 0.023048948496580124,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.1.g": {
"min": 0.9985297322273254,
"max": 1.007304310798645,
"mean": 0.9998952150344849,
"std": 0.0011818050406873226,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_q.weight": {
"min": -0.031265806406736374,
"max": 0.0312703475356102,
"mean": -1.928816709551029e-05,
"std": 0.018041111528873444,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_q.bias": {
"min": -0.031225843355059624,
"max": 0.030984507873654366,
"mean": -0.001084179850295186,
"std": 0.01795078068971634,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_k.weight": {
"min": -0.031264401972293854,
"max": 0.03126936033368111,
"mean": 3.5438486065686448e-06,
"std": 0.018041551113128662,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_k.bias": {
"min": -0.031160537153482437,
"max": 0.031171930953860283,
"mean": 0.00033398409141227603,
"std": 0.01806296594440937,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_out.0.bias": {
"min": -0.0006154034635983407,
"max": 0.00041452725417912006,
"mean": 1.3732544630329357e-06,
"std": 0.00013773542013950646,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.3.g": {
"min": 0.9981350898742676,
"max": 1.0061345100402832,
"mean": 1.0003111362457275,
"std": 0.0018558463780209422,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.4.ff.0.0.weight": {
"min": -0.032749444246292114,
"max": 0.03284144029021263,
"mean": -6.684205800411291e-06,
"std": 0.01804272271692753,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.11.4.ff.0.0.bias": {
"min": -0.03275851905345917,
"max": 0.03259003907442093,
"mean": -0.00013117710477672517,
"std": 0.017956379801034927,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.11.4.ff.2.weight": {
"min": -0.0011779898777604103,
"max": 0.001155506120994687,
"mean": 3.63817605375516e-07,
"std": 0.00021426456805784255,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.11.4.ff.2.bias": {
"min": -0.0005257476586848497,
"max": 0.0003992951533291489,
"mean": 2.2647066089120926e-06,
"std": 0.00012679416977334768,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.1.g": {
"min": 0.3831771910190582,
"max": 0.7203002572059631,
"mean": 0.5807632207870483,
"std": 0.039030127227306366,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_q.weight": {
"min": -0.238657608628273,
"max": 0.1965981125831604,
"mean": 2.6105446522706188e-05,
"std": 0.03746547922492027,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_q.bias": {
"min": -0.11904074251651764,
"max": 0.16665399074554443,
"mean": 0.0009819172555580735,
"std": 0.027577750384807587,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_k.weight": {
"min": -0.2464642971754074,
"max": 0.5006471276283264,
"mean": -5.0186910812044516e-05,
"std": 0.03762289881706238,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_k.bias": {
"min": -3.946474552154541,
"max": 3.7734150886535645,
"mean": -0.0035824859514832497,
"std": 0.681806743144989,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_v.weight": {
"min": -0.22754359245300293,
"max": 0.25217491388320923,
"mean": -1.1530558367667254e-05,
"std": 0.03743445873260498,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_v.bias": {
"min": -0.07182253897190094,
"max": 0.0808083638548851,
"mean": -0.000513089878950268,
"std": 0.015668950974941254,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_out.0.weight": {
"min": -0.22810040414333344,
"max": 0.2579977512359619,
"mean": -2.8758044209098443e-05,
"std": 0.03542134538292885,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_out.0.bias": {
"min": -0.20080700516700745,
"max": 0.2153109759092331,
"mean": -0.005534037947654724,
"std": 0.0683637484908104,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.3.g": {
"min": 0.4053238332271576,
"max": 1.1908336877822876,
"mean": 0.7380030155181885,
"std": 0.05547412484884262,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.4.ff.0.0.weight": {
"min": -0.2215573787689209,
"max": 0.24592049419879913,
"mean": 0.000521159905474633,
"std": 0.041335850954055786,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.12.4.ff.0.0.bias": {
"min": -0.10345371812582016,
"max": 0.024234607815742493,
"mean": -0.032675523310899734,
"std": 0.018910475075244904,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.12.4.ff.2.weight": {
"min": -0.4504980742931366,
"max": 0.42334607243537903,
"mean": -0.0004341673047747463,
"std": 0.04689619690179825,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.12.4.ff.2.bias": {
"min": -0.2517021596431732,
"max": 0.4706237316131592,
"mean": 0.0032027317211031914,
"std": 0.04455312713980675,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.0.weight": {
"min": -0.3171294331550598,
"max": 0.33335307240486145,
"mean": -2.5211516913259402e-05,
"std": 0.021287426352500916,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.13.1.g": {
"min": 0.3245099186897278,
"max": 0.6862163543701172,
"mean": 0.5710394978523254,
"std": 0.04481911659240723,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_q.weight": {
"min": -0.1647811233997345,
"max": 0.1747460514307022,
"mean": -4.884982990915887e-05,
"std": 0.03318081423640251,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_q.bias": {
"min": -0.18714644014835358,
"max": 0.1431918442249298,
"mean": 4.32572269346565e-05,
"std": 0.029710030183196068,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_k.weight": {
"min": -0.3816376030445099,
"max": 0.24683159589767456,
"mean": -9.986059922084678e-06,
"std": 0.0327618382871151,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_k.bias": {
"min": -3.6597650051116943,
"max": 3.293627977371216,
"mean": -0.014285150915384293,
"std": 0.9855467677116394,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_v.weight": {
"min": -0.23503181338310242,
"max": 0.24772128462791443,
"mean": -1.80145725607872e-05,
"std": 0.04169723764061928,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_v.bias": {
"min": -0.07281922549009323,
"max": 0.1548185795545578,
"mean": 0.0006660926737822592,
"std": 0.025179805234074593,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_out.0.weight": {
"min": -0.2665092945098877,
"max": 0.2483654022216797,
"mean": -1.536182753625326e-05,
"std": 0.04013803228735924,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_out.0.bias": {
"min": -0.18998344242572784,
"max": 0.1951427161693573,
"mean": -0.0012352201156318188,
"std": 0.06669348478317261,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.3.g": {
"min": 0.32910633087158203,
"max": 1.0014653205871582,
"mean": 0.7192941308021545,
"std": 0.05263138189911842,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.4.ff.0.0.weight": {
"min": -0.23228409886360168,
"max": 0.24597151577472687,
"mean": 0.00018284631369169801,
"std": 0.040899865329265594,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.13.4.ff.0.0.bias": {
"min": -0.11449356377124786,
"max": 0.019026821479201317,
"mean": -0.042487140744924545,
"std": 0.018874552100896835,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.13.4.ff.2.weight": {
"min": -0.39081379771232605,
"max": 0.4084374010562897,
"mean": -2.154261528630741e-05,
"std": 0.04853346198797226,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.13.4.ff.2.bias": {
"min": -0.6939337849617004,
"max": 0.4130322337150574,
"mean": 0.0008477974915876985,
"std": 0.06032131612300873,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.0.weight": {
"min": -0.0010364858899265528,
"max": 1.000504493713379,
"mean": 0.00048820534721016884,
"std": 0.022089021280407906,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.14.1.g": {
"min": 0.997757077217102,
"max": 1.0054128170013428,
"mean": 0.9996482133865356,
"std": 0.0006391748902387917,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_q.weight": {
"min": -0.031263865530490875,
"max": 0.03126693516969681,
"mean": -2.1029807612649165e-05,
"std": 0.018032483756542206,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_q.bias": {
"min": -0.031225642189383507,
"max": 0.031231923028826714,
"mean": -0.000677043863106519,
"std": 0.017827108502388,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_k.weight": {
"min": -0.031264521181583405,
"max": 0.03126373142004013,
"mean": -8.835060725687072e-06,
"std": 0.018031509593129158,
"sparsity": 9.5367431640625e-07,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_k.bias": {
"min": -0.031228171661496162,
"max": 0.031247133389115334,
"mean": -0.0007299243006855249,
"std": 0.017942015081644058,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_out.0.bias": {
"min": -0.00041853971197269857,
"max": 0.0003325868456158787,
"mean": -3.1447550554730697e-06,
"std": 0.0001163617562269792,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.3.g": {
"min": 0.9978469014167786,
"max": 1.0084865093231201,
"mean": 1.0002028942108154,
"std": 0.002608145819976926,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.4.ff.0.0.weight": {
"min": -0.03244105726480484,
"max": 0.03237903118133545,
"mean": -1.7311865576630225e-06,
"std": 0.018027927726507187,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.14.4.ff.0.0.bias": {
"min": -0.032130636274814606,
"max": 0.03116563893854618,
"mean": -0.0003740063984878361,
"std": 0.01804370991885662,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.14.4.ff.2.weight": {
"min": -0.0012820950942113996,
"max": 0.0011165018659085035,
"mean": -8.955282169154088e-07,
"std": 0.00020968639000784606,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.14.4.ff.2.bias": {
"min": -0.0003423716698307544,
"max": 0.00029734382405877113,
"mean": -3.7682302718167193e-06,
"std": 0.00010476629540789872,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.0.weight": {
"min": -0.23448625206947327,
"max": 0.272605836391449,
"mean": 6.777544967917493e-06,
"std": 0.018809372559189796,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.15.1.g": {
"min": 0.32130253314971924,
"max": 0.6949947476387024,
"mean": 0.5816991329193115,
"std": 0.04608374834060669,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_q.weight": {
"min": -0.18193963170051575,
"max": 0.19776132702827454,
"mean": -1.1586925211304333e-05,
"std": 0.033183593302965164,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_q.bias": {
"min": -0.16079005599021912,
"max": 0.12958164513111115,
"mean": -0.0010761492885649204,
"std": 0.03415785729885101,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_k.weight": {
"min": -0.33248019218444824,
"max": 0.31138068437576294,
"mean": -1.0150852176593617e-05,
"std": 0.0322343148291111,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_k.bias": {
"min": -7.811703681945801,
"max": 8.77199935913086,
"mean": 0.09351971745491028,
"std": 1.6208088397979736,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_v.weight": {
"min": -0.23395448923110962,
"max": 0.24196705222129822,
"mean": 4.150588938500732e-05,
"std": 0.04085612669587135,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_v.bias": {
"min": -0.07607380300760269,
"max": 0.06586506962776184,
"mean": 0.0004828007658943534,
"std": 0.01941879838705063,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_out.0.weight": {
"min": -0.24638578295707703,
"max": 0.23463943600654602,
"mean": -3.1122344807954505e-06,
"std": 0.03943074867129326,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_out.0.bias": {
"min": -0.16335651278495789,
"max": 0.16123652458190918,
"mean": 0.001627025194466114,
"std": 0.0652812197804451,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.3.g": {
"min": 0.5570001006126404,
"max": 0.9467727541923523,
"mean": 0.7130157351493835,
"std": 0.04052889347076416,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.4.ff.0.0.weight": {
"min": -0.22856345772743225,
"max": 0.2556101679801941,
"mean": -4.5706547098234296e-05,
"std": 0.040574584156274796,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.15.4.ff.0.0.bias": {
"min": -0.1351189911365509,
"max": 0.02213732711970806,
"mean": -0.04135933890938759,
"std": 0.018408460542559624,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.15.4.ff.2.weight": {
"min": -0.4225083887577057,
"max": 0.3927571773529053,
"mean": -4.4740827433997765e-06,
"std": 0.04778379574418068,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.15.4.ff.2.bias": {
"min": -0.6081869602203369,
"max": 0.6523037552833557,
"mean": 0.0015862288419157267,
"std": 0.0568697564303875,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.0.weight": {
"min": -0.25164270401000977,
"max": 0.32068535685539246,
"mean": -6.094380296417512e-06,
"std": 0.019612763077020645,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.16.1.g": {
"min": 0.3596651554107666,
"max": 0.6836386322975159,
"mean": 0.5707623958587646,
"std": 0.04307318106293678,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_q.weight": {
"min": -0.2204994410276413,
"max": 0.17691564559936523,
"mean": -3.469674993539229e-05,
"std": 0.034298643469810486,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_q.bias": {
"min": -0.16342805325984955,
"max": 0.23329652845859528,
"mean": 0.0003627383557613939,
"std": 0.03284167870879173,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_k.weight": {
"min": -0.2643727660179138,
"max": 0.2404099404811859,
"mean": -5.280954064801335e-05,
"std": 0.03389745578169823,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_k.bias": {
"min": -4.859966278076172,
"max": 5.0964674949646,
"mean": 0.04393793269991875,
"std": 1.230094075202942,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_v.weight": {
"min": -0.2466304451227188,
"max": 0.25078442692756653,
"mean": 7.233464566525072e-05,
"std": 0.04398677870631218,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_v.bias": {
"min": -0.06267692148685455,
"max": 0.054532695561647415,
"mean": 0.000642440456431359,
"std": 0.017191536724567413,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_out.0.weight": {
"min": -0.28690966963768005,
"max": 0.27239924669265747,
"mean": -5.01475042256061e-05,
"std": 0.04298488423228264,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_out.0.bias": {
"min": -0.1612030565738678,
"max": 0.1705736219882965,
"mean": -0.0028862706385552883,
"std": 0.05929599329829216,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.3.g": {
"min": 0.519792377948761,
"max": 0.9359998106956482,
"mean": 0.7136070132255554,
"std": 0.03880562260746956,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.4.ff.0.0.weight": {
"min": -0.23828113079071045,
"max": 0.24893540143966675,
"mean": 0.0004648254835046828,
"std": 0.040453579276800156,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.16.4.ff.0.0.bias": {
"min": -0.14526499807834625,
"max": 0.041103385388851166,
"mean": -0.03970393165946007,
"std": 0.02056412398815155,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.16.4.ff.2.weight": {
"min": -0.5338290929794312,
"max": 0.5837586522102356,
"mean": 5.762096407124773e-06,
"std": 0.04885942488908768,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.16.4.ff.2.bias": {
"min": -0.5195844769477844,
"max": 0.4939325749874115,
"mean": 0.002366485306993127,
"std": 0.05347662419080734,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.0.weight": {
"min": -0.273802787065506,
"max": 0.3155968487262726,
"mean": 2.01077523342974e-06,
"std": 0.02004941552877426,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.17.1.g": {
"min": 0.36614885926246643,
"max": 0.7128685116767883,
"mean": 0.5932222604751587,
"std": 0.04609934985637665,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_q.weight": {
"min": -0.21129509806632996,
"max": 0.19956757128238678,
"mean": 3.06197653117124e-05,
"std": 0.034865960478782654,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_q.bias": {
"min": -0.18738499283790588,
"max": 0.20401518046855927,
"mean": 0.0009546762448735535,
"std": 0.031527843326330185,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_k.weight": {
"min": -0.2900747060775757,
"max": 0.3402419686317444,
"mean": -4.711254223366268e-05,
"std": 0.03458685800433159,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_k.bias": {
"min": -3.881408214569092,
"max": 3.3909339904785156,
"mean": 0.014485932886600494,
"std": 0.8588526248931885,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_v.weight": {
"min": -0.22496825456619263,
"max": 0.2504532039165497,
"mean": -3.7677732507290784e-06,
"std": 0.04222949594259262,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_v.bias": {
"min": -0.05539275333285332,
"max": 0.046729691326618195,
"mean": -1.6585952835157514e-05,
"std": 0.01585092395544052,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_out.0.weight": {
"min": -0.29304519295692444,
"max": 0.2904603183269501,
"mean": -7.356060450547375e-06,
"std": 0.04194435849785805,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_out.0.bias": {
"min": -0.12514667212963104,
"max": 0.25951117277145386,
"mean": -0.003241210710257292,
"std": 0.05318121612071991,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.3.g": {
"min": 0.4564977288246155,
"max": 0.847152590751648,
"mean": 0.7056270837783813,
"std": 0.03555477410554886,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.4.ff.0.0.weight": {
"min": -0.5119993090629578,
"max": 0.3481258749961853,
"mean": 0.0003428043273743242,
"std": 0.04019870236515999,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.17.4.ff.0.0.bias": {
"min": -0.18613915145397186,
"max": 0.03958306089043617,
"mean": -0.03939869999885559,
"std": 0.021371137350797653,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.17.4.ff.2.weight": {
"min": -0.5454614162445068,
"max": 0.5573456287384033,
"mean": -7.15605856385082e-05,
"std": 0.05073413625359535,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.17.4.ff.2.bias": {
"min": -0.5122924447059631,
"max": 0.6649084091186523,
"mean": 0.002443553414195776,
"std": 0.04954148083925247,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.0.weight": {
"min": -0.3326307237148285,
"max": 0.2655903100967407,
"mean": 3.417561856622342e-06,
"std": 0.01938662678003311,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.18.1.g": {
"min": 0.32189854979515076,
"max": 0.7676428556442261,
"mean": 0.6510834097862244,
"std": 0.045412834733724594,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_q.weight": {
"min": -0.24963903427124023,
"max": 0.21975325047969818,
"mean": -2.1360538084991276e-06,
"std": 0.03650053218007088,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_q.bias": {
"min": -0.3272336423397064,
"max": 0.2872598171234131,
"mean": -0.000690902175847441,
"std": 0.038575589656829834,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_k.weight": {
"min": -0.3108454644680023,
"max": 0.3709103763103485,
"mean": 6.501353345811367e-05,
"std": 0.036241017282009125,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_k.bias": {
"min": -4.7285308837890625,
"max": 5.821481227874756,
"mean": 0.03798262029886246,
"std": 1.4149147272109985,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_v.weight": {
"min": -0.22184839844703674,
"max": 0.20582044124603271,
"mean": -7.514897151850164e-05,
"std": 0.04248502478003502,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_v.bias": {
"min": -0.07779642939567566,
"max": 0.05152571201324463,
"mean": -0.0009286667918786407,
"std": 0.016416585072875023,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_out.0.weight": {
"min": -0.33085039258003235,
"max": 0.3292792737483978,
"mean": -4.624932898877887e-06,
"std": 0.04279141500592232,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_out.0.bias": {
"min": -0.2853098511695862,
"max": 0.11214260756969452,
"mean": -0.001206133747473359,
"std": 0.0470227487385273,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.3.g": {
"min": 0.48610714077949524,
"max": 0.8880516886711121,
"mean": 0.7374852299690247,
"std": 0.038454823195934296,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.4.ff.0.0.weight": {
"min": -0.3625198006629944,
"max": 0.274814248085022,
"mean": 5.1260511099826545e-05,
"std": 0.040644217282533646,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.18.4.ff.0.0.bias": {
"min": -0.24789389967918396,
"max": 0.046399183571338654,
"mean": -0.0392770953476429,
"std": 0.023303059861063957,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.18.4.ff.2.weight": {
"min": -0.6276291012763977,
"max": 0.5983994007110596,
"mean": -6.147650128696114e-05,
"std": 0.053116291761398315,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.18.4.ff.2.bias": {
"min": -0.7109575271606445,
"max": 0.2664211392402649,
"mean": 0.0009173410944640636,
"std": 0.05126515030860901,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.0.weight": {
"min": -0.3433755040168762,
"max": 0.30368152260780334,
"mean": 1.5963701116561424e-07,
"std": 0.01913503371179104,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.19.1.g": {
"min": 0.34989097714424133,
"max": 0.7839252948760986,
"mean": 0.6388714909553528,
"std": 0.04933994635939598,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_q.weight": {
"min": -0.20559623837471008,
"max": 0.20719166100025177,
"mean": -5.992387013975531e-05,
"std": 0.03769540786743164,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_q.bias": {
"min": -0.25907090306282043,
"max": 0.2685673236846924,
"mean": -0.00039763032691553235,
"std": 0.04464223235845566,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_k.weight": {
"min": -0.35467270016670227,
"max": 0.3229817748069763,
"mean": -6.9561183408950455e-06,
"std": 0.03720381483435631,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_k.bias": {
"min": -5.267129421234131,
"max": 4.20892858505249,
"mean": -0.02641383744776249,
"std": 1.0074299573898315,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_v.weight": {
"min": -0.23873740434646606,
"max": 0.24359266459941864,
"mean": -2.525941454223357e-05,
"std": 0.04320967569947243,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_v.bias": {
"min": -0.06238892674446106,
"max": 0.056785948574543,
"mean": 0.0003448878414928913,
"std": 0.014156854711472988,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_out.0.weight": {
"min": -0.43733710050582886,
"max": 0.3737178444862366,
"mean": 1.443843029846903e-05,
"std": 0.044121142476797104,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_out.0.bias": {
"min": -0.09657814353704453,
"max": 0.1761663407087326,
"mean": -0.0006602209759876132,
"std": 0.03516199812293053,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.3.g": {
"min": 0.4218268096446991,
"max": 1.070821762084961,
"mean": 0.7484229803085327,
"std": 0.042183347046375275,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.4.ff.0.0.weight": {
"min": -0.2668064832687378,
"max": 0.2973981201648712,
"mean": -7.947084668558091e-05,
"std": 0.0408041812479496,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.19.4.ff.0.0.bias": {
"min": -0.18554465472698212,
"max": 0.04366818815469742,
"mean": -0.03683188557624817,
"std": 0.025637373328208923,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.19.4.ff.2.weight": {
"min": -0.4576941728591919,
"max": 0.4877614378929138,
"mean": 4.342636384535581e-05,
"std": 0.05420947074890137,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.19.4.ff.2.bias": {
"min": -0.28702330589294434,
"max": 0.5525704622268677,
"mean": -0.0008802832453511655,
"std": 0.04786703363060951,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.0.weight": {
"min": -0.2927917540073395,
"max": 0.32283690571784973,
"mean": 6.15146973359515e-06,
"std": 0.019968591630458832,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.20.1.g": {
"min": 0.2908625304698944,
"max": 0.7625526785850525,
"mean": 0.650852382183075,
"std": 0.052188921719789505,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_q.weight": {
"min": -0.24394108355045319,
"max": 0.2618102431297302,
"mean": -5.981732101645321e-06,
"std": 0.0396115742623806,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_q.bias": {
"min": -0.267729252576828,
"max": 0.20025481283664703,
"mean": -0.0008811865700408816,
"std": 0.05178782343864441,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_k.weight": {
"min": -0.27242225408554077,
"max": 0.25395235419273376,
"mean": 4.551842721411958e-06,
"std": 0.03870858997106552,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_k.bias": {
"min": -12.978915214538574,
"max": 15.964410781860352,
"mean": 0.033282238990068436,
"std": 1.9907665252685547,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_v.weight": {
"min": -0.20757102966308594,
"max": 0.2263997346162796,
"mean": -7.214213110273704e-05,
"std": 0.04055347666144371,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_v.bias": {
"min": -0.06938357651233673,
"max": 0.06327643245458603,
"mean": 0.00015629694098606706,
"std": 0.014746708795428276,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_out.0.weight": {
"min": -0.46517401933670044,
"max": 0.320604145526886,
"mean": 1.968832475540694e-05,
"std": 0.040588606148958206,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_out.0.bias": {
"min": -0.06422771513462067,
"max": 0.11537671089172363,
"mean": 0.0011921785771846771,
"std": 0.024717185646295547,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.3.g": {
"min": 0.3747407793998718,
"max": 0.935266375541687,
"mean": 0.750953733921051,
"std": 0.040338218212127686,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.4.ff.0.0.weight": {
"min": -0.2802576720714569,
"max": 0.2736368775367737,
"mean": -0.00016840582247823477,
"std": 0.04099476709961891,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.20.4.ff.0.0.bias": {
"min": -0.19888785481452942,
"max": 0.05115103721618652,
"mean": -0.0320354662835598,
"std": 0.025122012943029404,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.20.4.ff.2.weight": {
"min": -0.6593934297561646,
"max": 0.5366666913032532,
"mean": -4.888622788712382e-05,
"std": 0.05284604802727699,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.20.4.ff.2.bias": {
"min": -0.19323143362998962,
"max": 0.5829473733901978,
"mean": -0.0005128738121129572,
"std": 0.041099581867456436,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.0.weight": {
"min": -0.41776803135871887,
"max": 0.3719577491283417,
"mean": 6.155986739031505e-06,
"std": 0.02162076160311699,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.21.1.g": {
"min": 0.21425622701644897,
"max": 0.7496172189712524,
"mean": 0.6495488882064819,
"std": 0.054406262934207916,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_q.weight": {
"min": -0.20992936193943024,
"max": 0.1961071640253067,
"mean": 4.025327507406473e-05,
"std": 0.039461661130189896,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_q.bias": {
"min": -0.3297625780105591,
"max": 0.25971850752830505,
"mean": -0.003232162445783615,
"std": 0.05629448592662811,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_k.weight": {
"min": -0.2059866487979889,
"max": 0.25485166907310486,
"mean": 5.424032860901207e-05,
"std": 0.0385642871260643,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_k.bias": {
"min": -6.252347469329834,
"max": 6.942240238189697,
"mean": 0.0483565516769886,
"std": 1.3863071203231812,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_v.weight": {
"min": -0.21015214920043945,
"max": 0.2306891679763794,
"mean": -5.141047040524427e-06,
"std": 0.041312482208013535,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_v.bias": {
"min": -0.04387219622731209,
"max": 0.036041487008333206,
"mean": 6.907794158905745e-07,
"std": 0.012801294215023518,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_out.0.weight": {
"min": -0.3976612091064453,
"max": 0.3448401689529419,
"mean": -5.557302574743517e-05,
"std": 0.04238886013627052,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_out.0.bias": {
"min": -0.055147796869277954,
"max": 0.06285040080547333,
"mean": 0.00036463249125517905,
"std": 0.018676765263080597,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.3.g": {
"min": 0.3504812717437744,
"max": 1.0465654134750366,
"mean": 0.7894250154495239,
"std": 0.048819400370121,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.4.ff.0.0.weight": {
"min": -0.3337032198905945,
"max": 0.3862806558609009,
"mean": -0.00016953393060248345,
"std": 0.04147983714938164,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.21.4.ff.0.0.bias": {
"min": -0.1576613336801529,
"max": 0.0590929239988327,
"mean": -0.03184548765420914,
"std": 0.02515709400177002,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.21.4.ff.2.weight": {
"min": -0.6968328952789307,
"max": 0.469901978969574,
"mean": -8.902316039893776e-05,
"std": 0.051792412996292114,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.21.4.ff.2.bias": {
"min": -0.24871186912059784,
"max": 0.32932594418525696,
"mean": -0.0002525809977669269,
"std": 0.04146667197346687,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.0.weight": {
"min": -0.2871420085430145,
"max": 0.35027819871902466,
"mean": -2.14410374610452e-06,
"std": 0.024236002936959267,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.22.1.g": {
"min": 0.19654829800128937,
"max": 0.7817674279212952,
"mean": 0.6702600121498108,
"std": 0.058710552752017975,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_q.weight": {
"min": -0.22942325472831726,
"max": 0.2315986454486847,
"mean": -1.993781370401848e-05,
"std": 0.04043741896748543,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_q.bias": {
"min": -0.22012899816036224,
"max": 0.24119356274604797,
"mean": 0.0007787380600348115,
"std": 0.0558554045855999,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_k.weight": {
"min": -0.21590574085712433,
"max": 0.22671166062355042,
"mean": -7.169770105974749e-05,
"std": 0.03937356546521187,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_k.bias": {
"min": -8.917876243591309,
"max": 9.080994606018066,
"mean": -0.001221940852701664,
"std": 1.850203514099121,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_v.weight": {
"min": -0.2697039842605591,
"max": 0.2592160999774933,
"mean": 4.3639320210786536e-05,
"std": 0.03840581700205803,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_v.bias": {
"min": -0.057751722633838654,
"max": 0.05785238742828369,
"mean": 0.0003506582579575479,
"std": 0.014723116531968117,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_out.0.weight": {
"min": -0.26493385434150696,
"max": 0.28856679797172546,
"mean": -6.166309321997687e-05,
"std": 0.0390719398856163,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_out.0.bias": {
"min": -0.04392173886299133,
"max": 0.037354789674282074,
"mean": -9.023403254104778e-05,
"std": 0.013362305238842964,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.3.g": {
"min": 0.339423805475235,
"max": 1.0940691232681274,
"mean": 0.8637771010398865,
"std": 0.06392761319875717,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.4.ff.0.0.weight": {
"min": -0.42344674468040466,
"max": 0.419131875038147,
"mean": 0.0003126289520878345,
"std": 0.04350034520030022,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.22.4.ff.0.0.bias": {
"min": -0.2149772197008133,
"max": 0.1709900051355362,
"mean": -0.02949333004653454,
"std": 0.03195162117481232,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.22.4.ff.2.weight": {
"min": -0.6006098985671997,
"max": 0.5608966946601868,
"mean": -0.00015077056013979018,
"std": 0.05344511568546295,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.22.4.ff.2.bias": {
"min": -0.17873013019561768,
"max": 0.3772476017475128,
"mean": 0.001360590336844325,
"std": 0.03732540085911751,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.0.weight": {
"min": -0.3945685923099518,
"max": 0.3692152500152588,
"mean": 3.696953717735596e-05,
"std": 0.02861735410988331,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.23.1.g": {
"min": 0.2903454899787903,
"max": 0.8293581604957581,
"mean": 0.7055460214614868,
"std": 0.0678996667265892,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_q.weight": {
"min": -0.9263197779655457,
"max": 1.0265021324157715,
"mean": -2.6120340407942422e-05,
"std": 0.04762475937604904,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_q.bias": {
"min": -0.8807425498962402,
"max": 0.8172140717506409,
"mean": -0.00030884621082805097,
"std": 0.09569496661424637,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_k.weight": {
"min": -0.2697499990463257,
"max": 0.24099533259868622,
"mean": -2.2782449377700686e-05,
"std": 0.03895165026187897,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_k.bias": {
"min": -23.798847198486328,
"max": 22.90509796142578,
"mean": -0.0919695645570755,
"std": 4.078832626342773,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_v.weight": {
"min": -0.22787398099899292,
"max": 0.24508967995643616,
"mean": -2.5707324311952107e-05,
"std": 0.038637157529592514,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_v.bias": {
"min": -0.06037602946162224,
"max": 0.04592515528202057,
"mean": -0.00014296159497462213,
"std": 0.01469582598656416,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_out.0.weight": {
"min": -0.33830153942108154,
"max": 0.3749238848686218,
"mean": 7.406164513668045e-06,
"std": 0.04081294313073158,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_out.0.bias": {
"min": -0.04650312289595604,
"max": 0.19583187997341156,
"mean": 0.00027365636196918786,
"std": 0.01356838084757328,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.3.g": {
"min": 0.37443819642066956,
"max": 1.133804440498352,
"mean": 0.8900732398033142,
"std": 0.06407663971185684,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.4.ff.0.0.weight": {
"min": -0.44803717732429504,
"max": 0.5431130528450012,
"mean": 2.468598904670216e-05,
"std": 0.045565586537122726,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.23.4.ff.0.0.bias": {
"min": -0.22437457740306854,
"max": 0.08822718262672424,
"mean": -0.03203187137842178,
"std": 0.037792954593896866,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.23.4.ff.2.weight": {
"min": -0.7267696261405945,
"max": 0.6905267834663391,
"mean": 3.431630102568306e-05,
"std": 0.05177779868245125,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.23.4.ff.2.bias": {
"min": -0.17477792501449585,
"max": 0.2187574803829193,
"mean": 4.145095590502024e-05,
"std": 0.03179146349430084,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.0.weight": {
"min": -0.34067779779434204,
"max": 0.37430673837661743,
"mean": 4.298752173781395e-05,
"std": 0.034139689058065414,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.24.1.g": {
"min": 0.3176548182964325,
"max": 1.2885946035385132,
"mean": 0.6015164256095886,
"std": 0.08361472934484482,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_q.weight": {
"min": -0.2833174467086792,
"max": 0.2604674696922302,
"mean": -2.836968405972584e-06,
"std": 0.0359807163476944,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_q.bias": {
"min": -0.23581622540950775,
"max": 0.20569506287574768,
"mean": 0.00023786764359101653,
"std": 0.05603973567485809,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_k.weight": {
"min": -0.4358341097831726,
"max": 0.3255886137485504,
"mean": 2.4293056412716396e-05,
"std": 0.03413134440779686,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_k.bias": {
"min": -5.55698823928833,
"max": 7.328329086303711,
"mean": -0.007412843406200409,
"std": 0.7006030082702637,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_v.weight": {
"min": -0.3444630801677704,
"max": 0.36411502957344055,
"mean": 0.00010332845704397187,
"std": 0.04782791808247566,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_v.bias": {
"min": -0.07393012195825577,
"max": 0.06051904335618019,
"mean": 0.0009339260286651552,
"std": 0.014950446784496307,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_out.0.weight": {
"min": -0.2559783458709717,
"max": 0.2868276536464691,
"mean": 4.447174433153123e-06,
"std": 0.041554734110832214,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_out.0.bias": {
"min": -0.05538920685648918,
"max": 0.06289947777986526,
"mean": 0.0001379675231873989,
"std": 0.007169328164309263,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.3.g": {
"min": 0.4936121106147766,
"max": 1.2250889539718628,
"mean": 1.0134532451629639,
"std": 0.11746872216463089,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.4.ff.0.0.weight": {
"min": -1.0937550067901611,
"max": 1.0471408367156982,
"mean": -4.919863567920402e-05,
"std": 0.05240846797823906,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.24.4.ff.0.0.bias": {
"min": -0.22377891838550568,
"max": 0.1730729043483734,
"mean": -0.0272611565887928,
"std": 0.036391731351614,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.24.4.ff.2.weight": {
"min": -0.8862237930297852,
"max": 0.9243613481521606,
"mean": -0.000145945290569216,
"std": 0.05328156799077988,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.24.4.ff.2.bias": {
"min": -0.1714460700750351,
"max": 0.38068291544914246,
"mean": 0.0033734007738530636,
"std": 0.03993367776274681,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.0.weight": {
"min": -0.7782041430473328,
"max": 0.7242955565452576,
"mean": 1.8867685867007822e-05,
"std": 0.046160779893398285,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.25.1.g": {
"min": 0.338652104139328,
"max": 1.4327832460403442,
"mean": 0.9483770728111267,
"std": 0.20681361854076385,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_q.weight": {
"min": -1.7457636594772339,
"max": 1.704433560371399,
"mean": 0.00022719459957443178,
"std": 0.15868502855300903,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_q.bias": {
"min": -1.2020785808563232,
"max": 1.102237343788147,
"mean": -0.009557764045894146,
"std": 0.20423445105552673,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_k.weight": {
"min": -0.4211972653865814,
"max": 0.42695388197898865,
"mean": 6.460870645241812e-05,
"std": 0.04801572859287262,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_k.bias": {
"min": -19.786317825317383,
"max": 19.58098602294922,
"mean": -0.24868716299533844,
"std": 4.785643100738525,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_v.weight": {
"min": -0.32400673627853394,
"max": 0.4385600686073303,
"mean": -1.1902460755663924e-05,
"std": 0.046161260455846786,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_v.bias": {
"min": -0.03411344811320305,
"max": 0.03715973347425461,
"mean": 0.000642350991256535,
"std": 0.012920677661895752,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_out.0.weight": {
"min": -0.7036018371582031,
"max": 0.6655198335647583,
"mean": 4.3310083128744736e-05,
"std": 0.057881489396095276,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_out.0.bias": {
"min": -0.07239808887243271,
"max": 0.06768179684877396,
"mean": -0.0001333777909167111,
"std": 0.012929531745612621,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.3.g": {
"min": 0.38025927543640137,
"max": 1.3922340869903564,
"mean": 1.0665740966796875,
"std": 0.21970504522323608,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.4.ff.0.0.weight": {
"min": -0.6167835593223572,
"max": 0.7178800106048584,
"mean": 0.00011188755161128938,
"std": 0.05802030861377716,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.25.4.ff.0.0.bias": {
"min": -0.21991026401519775,
"max": 0.22539444267749786,
"mean": 0.006232057698071003,
"std": 0.049761686474084854,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.25.4.ff.2.weight": {
"min": -0.6298967599868774,
"max": 0.8895401954650879,
"mean": 1.17591189336963e-05,
"std": 0.023527691140770912,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.25.4.ff.2.bias": {
"min": -0.5078860521316528,
"max": 0.47492364048957825,
"mean": -0.0030241229105740786,
"std": 0.0694146603345871,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.norm_out.g": {
"min": 0.5377801060676575,
"max": 1.1812876462936401,
"mean": 0.7827885746955872,
"std": 0.09896031767129898,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.proj_out.weight": {
"min": -0.2672213613986969,
"max": 0.21292650699615479,
"mean": -0.00022339042334351689,
"std": 0.05399598926305771,
"sparsity": 0.0,
"shape": [
100,
1024
]
},
"transformer.proj_out.bias": {
"min": -0.23856915533542633,
"max": 0.014836194925010204,
"mean": -0.043973349034786224,
"std": 0.03437991812825203,
"sparsity": 0.0,
"shape": [
100
]
}
}
}