ck9 / model_analysis.json
memevis's picture
Upload folder using huggingface_hub
98d8463 verified
{
"layer_types": {
"transformer": 391
},
"parameter_counts": {
"transformer.time_embed.time_mlp.0.weight": 262144,
"transformer.time_embed.time_mlp.0.bias": 1024,
"transformer.time_embed.time_mlp.2.weight": 1048576,
"transformer.time_embed.time_mlp.2.bias": 1024,
"transformer.text_embed.text_embed.weight": 254600,
"transformer.input_embed.proj.weight": 307200,
"transformer.input_embed.proj.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": 1024,
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": 2031616,
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": 1024,
"transformer.layers.0.1.g": 1024,
"transformer.layers.0.2.to_q.weight": 1048576,
"transformer.layers.0.2.to_q.bias": 1024,
"transformer.layers.0.2.to_k.weight": 1048576,
"transformer.layers.0.2.to_k.bias": 1024,
"transformer.layers.0.2.to_v.weight": 1048576,
"transformer.layers.0.2.to_v.bias": 1024,
"transformer.layers.0.2.to_out.0.weight": 1048576,
"transformer.layers.0.2.to_out.0.bias": 1024,
"transformer.layers.0.3.g": 1024,
"transformer.layers.0.4.ff.0.0.weight": 4194304,
"transformer.layers.0.4.ff.0.0.bias": 4096,
"transformer.layers.0.4.ff.2.weight": 4194304,
"transformer.layers.0.4.ff.2.bias": 1024,
"transformer.layers.1.1.g": 1024,
"transformer.layers.1.2.to_q.weight": 1048576,
"transformer.layers.1.2.to_q.bias": 1024,
"transformer.layers.1.2.to_k.weight": 1048576,
"transformer.layers.1.2.to_k.bias": 1024,
"transformer.layers.1.2.to_v.weight": 1048576,
"transformer.layers.1.2.to_v.bias": 1024,
"transformer.layers.1.2.to_out.0.weight": 1048576,
"transformer.layers.1.2.to_out.0.bias": 1024,
"transformer.layers.1.3.g": 1024,
"transformer.layers.1.4.ff.0.0.weight": 4194304,
"transformer.layers.1.4.ff.0.0.bias": 4096,
"transformer.layers.1.4.ff.2.weight": 4194304,
"transformer.layers.1.4.ff.2.bias": 1024,
"transformer.layers.2.1.g": 1024,
"transformer.layers.2.2.to_q.weight": 1048576,
"transformer.layers.2.2.to_q.bias": 1024,
"transformer.layers.2.2.to_k.weight": 1048576,
"transformer.layers.2.2.to_k.bias": 1024,
"transformer.layers.2.2.to_v.weight": 1048576,
"transformer.layers.2.2.to_v.bias": 1024,
"transformer.layers.2.2.to_out.0.weight": 1048576,
"transformer.layers.2.2.to_out.0.bias": 1024,
"transformer.layers.2.3.g": 1024,
"transformer.layers.2.4.ff.0.0.weight": 4194304,
"transformer.layers.2.4.ff.0.0.bias": 4096,
"transformer.layers.2.4.ff.2.weight": 4194304,
"transformer.layers.2.4.ff.2.bias": 1024,
"transformer.layers.3.1.g": 1024,
"transformer.layers.3.2.to_q.weight": 1048576,
"transformer.layers.3.2.to_q.bias": 1024,
"transformer.layers.3.2.to_k.weight": 1048576,
"transformer.layers.3.2.to_k.bias": 1024,
"transformer.layers.3.2.to_v.weight": 1048576,
"transformer.layers.3.2.to_v.bias": 1024,
"transformer.layers.3.2.to_out.0.weight": 1048576,
"transformer.layers.3.2.to_out.0.bias": 1024,
"transformer.layers.3.3.g": 1024,
"transformer.layers.3.4.ff.0.0.weight": 4194304,
"transformer.layers.3.4.ff.0.0.bias": 4096,
"transformer.layers.3.4.ff.2.weight": 4194304,
"transformer.layers.3.4.ff.2.bias": 1024,
"transformer.layers.4.1.g": 1024,
"transformer.layers.4.2.to_q.weight": 1048576,
"transformer.layers.4.2.to_q.bias": 1024,
"transformer.layers.4.2.to_k.weight": 1048576,
"transformer.layers.4.2.to_k.bias": 1024,
"transformer.layers.4.2.to_v.weight": 1048576,
"transformer.layers.4.2.to_v.bias": 1024,
"transformer.layers.4.2.to_out.0.weight": 1048576,
"transformer.layers.4.2.to_out.0.bias": 1024,
"transformer.layers.4.3.g": 1024,
"transformer.layers.4.4.ff.0.0.weight": 4194304,
"transformer.layers.4.4.ff.0.0.bias": 4096,
"transformer.layers.4.4.ff.2.weight": 4194304,
"transformer.layers.4.4.ff.2.bias": 1024,
"transformer.layers.5.1.g": 1024,
"transformer.layers.5.2.to_q.weight": 1048576,
"transformer.layers.5.2.to_q.bias": 1024,
"transformer.layers.5.2.to_k.weight": 1048576,
"transformer.layers.5.2.to_k.bias": 1024,
"transformer.layers.5.2.to_v.weight": 1048576,
"transformer.layers.5.2.to_v.bias": 1024,
"transformer.layers.5.2.to_out.0.weight": 1048576,
"transformer.layers.5.2.to_out.0.bias": 1024,
"transformer.layers.5.3.g": 1024,
"transformer.layers.5.4.ff.0.0.weight": 4194304,
"transformer.layers.5.4.ff.0.0.bias": 4096,
"transformer.layers.5.4.ff.2.weight": 4194304,
"transformer.layers.5.4.ff.2.bias": 1024,
"transformer.layers.6.1.g": 1024,
"transformer.layers.6.2.to_q.weight": 1048576,
"transformer.layers.6.2.to_q.bias": 1024,
"transformer.layers.6.2.to_k.weight": 1048576,
"transformer.layers.6.2.to_k.bias": 1024,
"transformer.layers.6.2.to_v.weight": 1048576,
"transformer.layers.6.2.to_v.bias": 1024,
"transformer.layers.6.2.to_out.0.weight": 1048576,
"transformer.layers.6.2.to_out.0.bias": 1024,
"transformer.layers.6.3.g": 1024,
"transformer.layers.6.4.ff.0.0.weight": 4194304,
"transformer.layers.6.4.ff.0.0.bias": 4096,
"transformer.layers.6.4.ff.2.weight": 4194304,
"transformer.layers.6.4.ff.2.bias": 1024,
"transformer.layers.7.1.g": 1024,
"transformer.layers.7.2.to_q.weight": 1048576,
"transformer.layers.7.2.to_q.bias": 1024,
"transformer.layers.7.2.to_k.weight": 1048576,
"transformer.layers.7.2.to_k.bias": 1024,
"transformer.layers.7.2.to_v.weight": 1048576,
"transformer.layers.7.2.to_v.bias": 1024,
"transformer.layers.7.2.to_out.0.weight": 1048576,
"transformer.layers.7.2.to_out.0.bias": 1024,
"transformer.layers.7.3.g": 1024,
"transformer.layers.7.4.ff.0.0.weight": 4194304,
"transformer.layers.7.4.ff.0.0.bias": 4096,
"transformer.layers.7.4.ff.2.weight": 4194304,
"transformer.layers.7.4.ff.2.bias": 1024,
"transformer.layers.8.1.g": 1024,
"transformer.layers.8.2.to_q.weight": 1048576,
"transformer.layers.8.2.to_q.bias": 1024,
"transformer.layers.8.2.to_k.weight": 1048576,
"transformer.layers.8.2.to_k.bias": 1024,
"transformer.layers.8.2.to_v.weight": 1048576,
"transformer.layers.8.2.to_v.bias": 1024,
"transformer.layers.8.2.to_out.0.weight": 1048576,
"transformer.layers.8.2.to_out.0.bias": 1024,
"transformer.layers.8.3.g": 1024,
"transformer.layers.8.4.ff.0.0.weight": 4194304,
"transformer.layers.8.4.ff.0.0.bias": 4096,
"transformer.layers.8.4.ff.2.weight": 4194304,
"transformer.layers.8.4.ff.2.bias": 1024,
"transformer.layers.9.1.g": 1024,
"transformer.layers.9.2.to_q.weight": 1048576,
"transformer.layers.9.2.to_q.bias": 1024,
"transformer.layers.9.2.to_k.weight": 1048576,
"transformer.layers.9.2.to_k.bias": 1024,
"transformer.layers.9.2.to_v.weight": 1048576,
"transformer.layers.9.2.to_v.bias": 1024,
"transformer.layers.9.2.to_out.0.weight": 1048576,
"transformer.layers.9.2.to_out.0.bias": 1024,
"transformer.layers.9.3.g": 1024,
"transformer.layers.9.4.ff.0.0.weight": 4194304,
"transformer.layers.9.4.ff.0.0.bias": 4096,
"transformer.layers.9.4.ff.2.weight": 4194304,
"transformer.layers.9.4.ff.2.bias": 1024,
"transformer.layers.10.1.g": 1024,
"transformer.layers.10.2.to_q.weight": 1048576,
"transformer.layers.10.2.to_q.bias": 1024,
"transformer.layers.10.2.to_k.weight": 1048576,
"transformer.layers.10.2.to_k.bias": 1024,
"transformer.layers.10.2.to_v.weight": 1048576,
"transformer.layers.10.2.to_v.bias": 1024,
"transformer.layers.10.2.to_out.0.weight": 1048576,
"transformer.layers.10.2.to_out.0.bias": 1024,
"transformer.layers.10.3.g": 1024,
"transformer.layers.10.4.ff.0.0.weight": 4194304,
"transformer.layers.10.4.ff.0.0.bias": 4096,
"transformer.layers.10.4.ff.2.weight": 4194304,
"transformer.layers.10.4.ff.2.bias": 1024,
"transformer.layers.11.1.g": 1024,
"transformer.layers.11.2.to_q.weight": 1048576,
"transformer.layers.11.2.to_q.bias": 1024,
"transformer.layers.11.2.to_k.weight": 1048576,
"transformer.layers.11.2.to_k.bias": 1024,
"transformer.layers.11.2.to_v.weight": 1048576,
"transformer.layers.11.2.to_v.bias": 1024,
"transformer.layers.11.2.to_out.0.weight": 1048576,
"transformer.layers.11.2.to_out.0.bias": 1024,
"transformer.layers.11.3.g": 1024,
"transformer.layers.11.4.ff.0.0.weight": 4194304,
"transformer.layers.11.4.ff.0.0.bias": 4096,
"transformer.layers.11.4.ff.2.weight": 4194304,
"transformer.layers.11.4.ff.2.bias": 1024,
"transformer.layers.12.1.g": 1024,
"transformer.layers.12.2.to_q.weight": 1048576,
"transformer.layers.12.2.to_q.bias": 1024,
"transformer.layers.12.2.to_k.weight": 1048576,
"transformer.layers.12.2.to_k.bias": 1024,
"transformer.layers.12.2.to_v.weight": 1048576,
"transformer.layers.12.2.to_v.bias": 1024,
"transformer.layers.12.2.to_out.0.weight": 1048576,
"transformer.layers.12.2.to_out.0.bias": 1024,
"transformer.layers.12.3.g": 1024,
"transformer.layers.12.4.ff.0.0.weight": 4194304,
"transformer.layers.12.4.ff.0.0.bias": 4096,
"transformer.layers.12.4.ff.2.weight": 4194304,
"transformer.layers.12.4.ff.2.bias": 1024,
"transformer.layers.13.0.weight": 2097152,
"transformer.layers.13.1.g": 1024,
"transformer.layers.13.2.to_q.weight": 1048576,
"transformer.layers.13.2.to_q.bias": 1024,
"transformer.layers.13.2.to_k.weight": 1048576,
"transformer.layers.13.2.to_k.bias": 1024,
"transformer.layers.13.2.to_v.weight": 1048576,
"transformer.layers.13.2.to_v.bias": 1024,
"transformer.layers.13.2.to_out.0.weight": 1048576,
"transformer.layers.13.2.to_out.0.bias": 1024,
"transformer.layers.13.3.g": 1024,
"transformer.layers.13.4.ff.0.0.weight": 4194304,
"transformer.layers.13.4.ff.0.0.bias": 4096,
"transformer.layers.13.4.ff.2.weight": 4194304,
"transformer.layers.13.4.ff.2.bias": 1024,
"transformer.layers.14.0.weight": 2097152,
"transformer.layers.14.1.g": 1024,
"transformer.layers.14.2.to_q.weight": 1048576,
"transformer.layers.14.2.to_q.bias": 1024,
"transformer.layers.14.2.to_k.weight": 1048576,
"transformer.layers.14.2.to_k.bias": 1024,
"transformer.layers.14.2.to_v.weight": 1048576,
"transformer.layers.14.2.to_v.bias": 1024,
"transformer.layers.14.2.to_out.0.weight": 1048576,
"transformer.layers.14.2.to_out.0.bias": 1024,
"transformer.layers.14.3.g": 1024,
"transformer.layers.14.4.ff.0.0.weight": 4194304,
"transformer.layers.14.4.ff.0.0.bias": 4096,
"transformer.layers.14.4.ff.2.weight": 4194304,
"transformer.layers.14.4.ff.2.bias": 1024,
"transformer.layers.15.0.weight": 2097152,
"transformer.layers.15.1.g": 1024,
"transformer.layers.15.2.to_q.weight": 1048576,
"transformer.layers.15.2.to_q.bias": 1024,
"transformer.layers.15.2.to_k.weight": 1048576,
"transformer.layers.15.2.to_k.bias": 1024,
"transformer.layers.15.2.to_v.weight": 1048576,
"transformer.layers.15.2.to_v.bias": 1024,
"transformer.layers.15.2.to_out.0.weight": 1048576,
"transformer.layers.15.2.to_out.0.bias": 1024,
"transformer.layers.15.3.g": 1024,
"transformer.layers.15.4.ff.0.0.weight": 4194304,
"transformer.layers.15.4.ff.0.0.bias": 4096,
"transformer.layers.15.4.ff.2.weight": 4194304,
"transformer.layers.15.4.ff.2.bias": 1024,
"transformer.layers.16.0.weight": 2097152,
"transformer.layers.16.1.g": 1024,
"transformer.layers.16.2.to_q.weight": 1048576,
"transformer.layers.16.2.to_q.bias": 1024,
"transformer.layers.16.2.to_k.weight": 1048576,
"transformer.layers.16.2.to_k.bias": 1024,
"transformer.layers.16.2.to_v.weight": 1048576,
"transformer.layers.16.2.to_v.bias": 1024,
"transformer.layers.16.2.to_out.0.weight": 1048576,
"transformer.layers.16.2.to_out.0.bias": 1024,
"transformer.layers.16.3.g": 1024,
"transformer.layers.16.4.ff.0.0.weight": 4194304,
"transformer.layers.16.4.ff.0.0.bias": 4096,
"transformer.layers.16.4.ff.2.weight": 4194304,
"transformer.layers.16.4.ff.2.bias": 1024,
"transformer.layers.17.0.weight": 2097152,
"transformer.layers.17.1.g": 1024,
"transformer.layers.17.2.to_q.weight": 1048576,
"transformer.layers.17.2.to_q.bias": 1024,
"transformer.layers.17.2.to_k.weight": 1048576,
"transformer.layers.17.2.to_k.bias": 1024,
"transformer.layers.17.2.to_v.weight": 1048576,
"transformer.layers.17.2.to_v.bias": 1024,
"transformer.layers.17.2.to_out.0.weight": 1048576,
"transformer.layers.17.2.to_out.0.bias": 1024,
"transformer.layers.17.3.g": 1024,
"transformer.layers.17.4.ff.0.0.weight": 4194304,
"transformer.layers.17.4.ff.0.0.bias": 4096,
"transformer.layers.17.4.ff.2.weight": 4194304,
"transformer.layers.17.4.ff.2.bias": 1024,
"transformer.layers.18.0.weight": 2097152,
"transformer.layers.18.1.g": 1024,
"transformer.layers.18.2.to_q.weight": 1048576,
"transformer.layers.18.2.to_q.bias": 1024,
"transformer.layers.18.2.to_k.weight": 1048576,
"transformer.layers.18.2.to_k.bias": 1024,
"transformer.layers.18.2.to_v.weight": 1048576,
"transformer.layers.18.2.to_v.bias": 1024,
"transformer.layers.18.2.to_out.0.weight": 1048576,
"transformer.layers.18.2.to_out.0.bias": 1024,
"transformer.layers.18.3.g": 1024,
"transformer.layers.18.4.ff.0.0.weight": 4194304,
"transformer.layers.18.4.ff.0.0.bias": 4096,
"transformer.layers.18.4.ff.2.weight": 4194304,
"transformer.layers.18.4.ff.2.bias": 1024,
"transformer.layers.19.0.weight": 2097152,
"transformer.layers.19.1.g": 1024,
"transformer.layers.19.2.to_q.weight": 1048576,
"transformer.layers.19.2.to_q.bias": 1024,
"transformer.layers.19.2.to_k.weight": 1048576,
"transformer.layers.19.2.to_k.bias": 1024,
"transformer.layers.19.2.to_v.weight": 1048576,
"transformer.layers.19.2.to_v.bias": 1024,
"transformer.layers.19.2.to_out.0.weight": 1048576,
"transformer.layers.19.2.to_out.0.bias": 1024,
"transformer.layers.19.3.g": 1024,
"transformer.layers.19.4.ff.0.0.weight": 4194304,
"transformer.layers.19.4.ff.0.0.bias": 4096,
"transformer.layers.19.4.ff.2.weight": 4194304,
"transformer.layers.19.4.ff.2.bias": 1024,
"transformer.layers.20.0.weight": 2097152,
"transformer.layers.20.1.g": 1024,
"transformer.layers.20.2.to_q.weight": 1048576,
"transformer.layers.20.2.to_q.bias": 1024,
"transformer.layers.20.2.to_k.weight": 1048576,
"transformer.layers.20.2.to_k.bias": 1024,
"transformer.layers.20.2.to_v.weight": 1048576,
"transformer.layers.20.2.to_v.bias": 1024,
"transformer.layers.20.2.to_out.0.weight": 1048576,
"transformer.layers.20.2.to_out.0.bias": 1024,
"transformer.layers.20.3.g": 1024,
"transformer.layers.20.4.ff.0.0.weight": 4194304,
"transformer.layers.20.4.ff.0.0.bias": 4096,
"transformer.layers.20.4.ff.2.weight": 4194304,
"transformer.layers.20.4.ff.2.bias": 1024,
"transformer.layers.21.0.weight": 2097152,
"transformer.layers.21.1.g": 1024,
"transformer.layers.21.2.to_q.weight": 1048576,
"transformer.layers.21.2.to_q.bias": 1024,
"transformer.layers.21.2.to_k.weight": 1048576,
"transformer.layers.21.2.to_k.bias": 1024,
"transformer.layers.21.2.to_v.weight": 1048576,
"transformer.layers.21.2.to_v.bias": 1024,
"transformer.layers.21.2.to_out.0.weight": 1048576,
"transformer.layers.21.2.to_out.0.bias": 1024,
"transformer.layers.21.3.g": 1024,
"transformer.layers.21.4.ff.0.0.weight": 4194304,
"transformer.layers.21.4.ff.0.0.bias": 4096,
"transformer.layers.21.4.ff.2.weight": 4194304,
"transformer.layers.21.4.ff.2.bias": 1024,
"transformer.layers.22.0.weight": 2097152,
"transformer.layers.22.1.g": 1024,
"transformer.layers.22.2.to_q.weight": 1048576,
"transformer.layers.22.2.to_q.bias": 1024,
"transformer.layers.22.2.to_k.weight": 1048576,
"transformer.layers.22.2.to_k.bias": 1024,
"transformer.layers.22.2.to_v.weight": 1048576,
"transformer.layers.22.2.to_v.bias": 1024,
"transformer.layers.22.2.to_out.0.weight": 1048576,
"transformer.layers.22.2.to_out.0.bias": 1024,
"transformer.layers.22.3.g": 1024,
"transformer.layers.22.4.ff.0.0.weight": 4194304,
"transformer.layers.22.4.ff.0.0.bias": 4096,
"transformer.layers.22.4.ff.2.weight": 4194304,
"transformer.layers.22.4.ff.2.bias": 1024,
"transformer.layers.23.0.weight": 2097152,
"transformer.layers.23.1.g": 1024,
"transformer.layers.23.2.to_q.weight": 1048576,
"transformer.layers.23.2.to_q.bias": 1024,
"transformer.layers.23.2.to_k.weight": 1048576,
"transformer.layers.23.2.to_k.bias": 1024,
"transformer.layers.23.2.to_v.weight": 1048576,
"transformer.layers.23.2.to_v.bias": 1024,
"transformer.layers.23.2.to_out.0.weight": 1048576,
"transformer.layers.23.2.to_out.0.bias": 1024,
"transformer.layers.23.3.g": 1024,
"transformer.layers.23.4.ff.0.0.weight": 4194304,
"transformer.layers.23.4.ff.0.0.bias": 4096,
"transformer.layers.23.4.ff.2.weight": 4194304,
"transformer.layers.23.4.ff.2.bias": 1024,
"transformer.layers.24.0.weight": 2097152,
"transformer.layers.24.1.g": 1024,
"transformer.layers.24.2.to_q.weight": 1048576,
"transformer.layers.24.2.to_q.bias": 1024,
"transformer.layers.24.2.to_k.weight": 1048576,
"transformer.layers.24.2.to_k.bias": 1024,
"transformer.layers.24.2.to_v.weight": 1048576,
"transformer.layers.24.2.to_v.bias": 1024,
"transformer.layers.24.2.to_out.0.weight": 1048576,
"transformer.layers.24.2.to_out.0.bias": 1024,
"transformer.layers.24.3.g": 1024,
"transformer.layers.24.4.ff.0.0.weight": 4194304,
"transformer.layers.24.4.ff.0.0.bias": 4096,
"transformer.layers.24.4.ff.2.weight": 4194304,
"transformer.layers.24.4.ff.2.bias": 1024,
"transformer.layers.25.0.weight": 2097152,
"transformer.layers.25.1.g": 1024,
"transformer.layers.25.2.to_q.weight": 1048576,
"transformer.layers.25.2.to_q.bias": 1024,
"transformer.layers.25.2.to_k.weight": 1048576,
"transformer.layers.25.2.to_k.bias": 1024,
"transformer.layers.25.2.to_v.weight": 1048576,
"transformer.layers.25.2.to_v.bias": 1024,
"transformer.layers.25.2.to_out.0.weight": 1048576,
"transformer.layers.25.2.to_out.0.bias": 1024,
"transformer.layers.25.3.g": 1024,
"transformer.layers.25.4.ff.0.0.weight": 4194304,
"transformer.layers.25.4.ff.0.0.bias": 4096,
"transformer.layers.25.4.ff.2.weight": 4194304,
"transformer.layers.25.4.ff.2.bias": 1024,
"transformer.norm_out.g": 1024,
"transformer.proj_out.weight": 102400,
"transformer.proj_out.bias": 100
},
"important_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
],
"bottleneck_layers": [],
"recommendations": {
"focus_layers": [
"transformer.time_embed.time_mlp.0.weight",
"transformer.time_embed.time_mlp.2.weight",
"transformer.text_embed.text_embed.weight",
"transformer.input_embed.proj.weight",
"transformer.input_embed.conv_pos_embed.conv1d.0.weight",
"transformer.input_embed.conv_pos_embed.conv1d.2.weight",
"transformer.layers.0.2.to_q.weight",
"transformer.layers.0.2.to_k.weight",
"transformer.layers.0.2.to_v.weight",
"transformer.layers.0.2.to_out.0.weight",
"transformer.layers.0.4.ff.0.0.weight",
"transformer.layers.0.4.ff.2.weight",
"transformer.layers.1.2.to_q.weight",
"transformer.layers.1.2.to_k.weight",
"transformer.layers.1.2.to_v.weight",
"transformer.layers.1.2.to_out.0.weight",
"transformer.layers.1.4.ff.0.0.weight",
"transformer.layers.1.4.ff.2.weight",
"transformer.layers.2.2.to_q.weight",
"transformer.layers.2.2.to_k.weight",
"transformer.layers.2.2.to_v.weight",
"transformer.layers.2.2.to_out.0.weight",
"transformer.layers.2.4.ff.0.0.weight",
"transformer.layers.2.4.ff.2.weight",
"transformer.layers.3.2.to_q.weight",
"transformer.layers.3.2.to_k.weight",
"transformer.layers.3.2.to_v.weight",
"transformer.layers.3.2.to_out.0.weight",
"transformer.layers.3.4.ff.0.0.weight",
"transformer.layers.3.4.ff.2.weight",
"transformer.layers.4.2.to_q.weight",
"transformer.layers.4.2.to_k.weight",
"transformer.layers.4.2.to_v.weight",
"transformer.layers.4.2.to_out.0.weight",
"transformer.layers.4.4.ff.0.0.weight",
"transformer.layers.4.4.ff.2.weight",
"transformer.layers.5.2.to_q.weight",
"transformer.layers.5.2.to_k.weight",
"transformer.layers.5.2.to_v.weight",
"transformer.layers.5.2.to_out.0.weight",
"transformer.layers.5.4.ff.0.0.weight",
"transformer.layers.5.4.ff.2.weight",
"transformer.layers.6.2.to_q.weight",
"transformer.layers.6.2.to_k.weight",
"transformer.layers.6.2.to_v.weight",
"transformer.layers.6.2.to_out.0.weight",
"transformer.layers.6.4.ff.0.0.weight",
"transformer.layers.6.4.ff.2.weight",
"transformer.layers.7.2.to_q.weight",
"transformer.layers.7.2.to_k.weight",
"transformer.layers.7.2.to_v.weight",
"transformer.layers.7.2.to_out.0.weight",
"transformer.layers.7.4.ff.0.0.weight",
"transformer.layers.7.4.ff.2.weight",
"transformer.layers.8.4.ff.0.0.weight",
"transformer.layers.8.4.ff.2.weight",
"transformer.layers.9.4.ff.0.0.weight",
"transformer.layers.9.4.ff.2.weight",
"transformer.layers.10.4.ff.0.0.weight",
"transformer.layers.10.4.ff.2.weight",
"transformer.layers.11.4.ff.0.0.weight",
"transformer.layers.11.4.ff.2.weight",
"transformer.layers.12.4.ff.0.0.weight",
"transformer.layers.12.4.ff.2.weight",
"transformer.layers.13.0.weight",
"transformer.layers.13.4.ff.0.0.weight",
"transformer.layers.13.4.ff.2.weight",
"transformer.layers.14.0.weight",
"transformer.layers.14.4.ff.0.0.weight",
"transformer.layers.14.4.ff.2.weight",
"transformer.layers.15.0.weight",
"transformer.layers.15.4.ff.0.0.weight",
"transformer.layers.15.4.ff.2.weight",
"transformer.layers.16.4.ff.0.0.weight",
"transformer.layers.16.4.ff.2.weight",
"transformer.layers.17.4.ff.0.0.weight",
"transformer.layers.17.4.ff.2.weight",
"transformer.layers.18.4.ff.0.0.weight",
"transformer.layers.18.4.ff.2.weight",
"transformer.layers.19.4.ff.0.0.weight",
"transformer.layers.19.4.ff.2.weight",
"transformer.layers.20.4.ff.0.0.weight",
"transformer.layers.20.4.ff.2.weight",
"transformer.layers.21.4.ff.0.0.weight",
"transformer.layers.21.4.ff.2.weight",
"transformer.layers.22.4.ff.0.0.weight",
"transformer.layers.22.4.ff.2.weight",
"transformer.layers.23.4.ff.0.0.weight",
"transformer.layers.23.4.ff.2.weight",
"transformer.layers.24.4.ff.0.0.weight",
"transformer.layers.24.4.ff.2.weight",
"transformer.layers.25.4.ff.0.0.weight",
"transformer.layers.25.4.ff.2.weight"
]
},
"total_parameters": 391,
"total_elements": 360755948,
"param_ranges": {
"transformer.time_embed.time_mlp.0.weight": {
"min": -0.43036678433418274,
"max": 0.2982814610004425,
"mean": -0.0025639168452471495,
"std": 0.04256023094058037,
"sparsity": 0.0,
"shape": [
1024,
256
]
},
"transformer.time_embed.time_mlp.0.bias": {
"min": -0.06307890266180038,
"max": 0.10733882337808609,
"mean": 0.000591748976148665,
"std": 0.034078747034072876,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.time_embed.time_mlp.2.weight": {
"min": -0.41281235218048096,
"max": 0.8368205428123474,
"mean": -0.00020580022828653455,
"std": 0.02411011978983879,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.time_embed.time_mlp.2.bias": {
"min": -0.11508890986442566,
"max": 0.3209010660648346,
"mean": -0.0009312849142588675,
"std": 0.01954229176044464,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.text_embed.text_embed.weight": {
"min": -2.7886247634887695,
"max": 2.8676700592041016,
"mean": -0.0003673843457363546,
"std": 0.6154846549034119,
"sparsity": 0.0,
"shape": [
2546,
100
]
},
"transformer.input_embed.proj.weight": {
"min": -0.27876999974250793,
"max": 0.3816433846950531,
"mean": 0.00041971245082095265,
"std": 0.0427577942609787,
"sparsity": 0.0,
"shape": [
1024,
300
]
},
"transformer.input_embed.proj.bias": {
"min": -0.22179193794727325,
"max": 0.20910178124904633,
"mean": -0.00449436716735363,
"std": 0.0408766008913517,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.weight": {
"min": -0.4284907877445221,
"max": 0.4762955904006958,
"mean": 1.3556076510212733e-06,
"std": 0.024511976167559624,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.0.bias": {
"min": -0.32450857758522034,
"max": 0.15602749586105347,
"mean": -0.04666242375969887,
"std": 0.05150512233376503,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.weight": {
"min": -0.4105537235736847,
"max": 0.35443225502967834,
"mean": -0.00012739744852297008,
"std": 0.023602385073900223,
"sparsity": 0.0,
"shape": [
1024,
64,
31
]
},
"transformer.input_embed.conv_pos_embed.conv1d.2.bias": {
"min": -0.22917909920215607,
"max": 0.2621273994445801,
"mean": -0.029117178171873093,
"std": 0.049283698201179504,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.1.g": {
"min": 0.2544216215610504,
"max": 0.8185670971870422,
"mean": 0.5252723693847656,
"std": 0.08049405366182327,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_q.weight": {
"min": -0.2967362403869629,
"max": 0.26540544629096985,
"mean": -0.0004257934633642435,
"std": 0.032104942947626114,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_q.bias": {
"min": -0.09282971918582916,
"max": 0.12431935220956802,
"mean": 0.000645699561573565,
"std": 0.02571764960885048,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_k.weight": {
"min": -0.2909117043018341,
"max": 0.28097161650657654,
"mean": -7.593112241011113e-05,
"std": 0.030932165682315826,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_k.bias": {
"min": -5.890472888946533,
"max": 5.805418491363525,
"mean": -0.009322225116193295,
"std": 1.2942466735839844,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_v.weight": {
"min": -0.42496761679649353,
"max": 0.3436029851436615,
"mean": 9.743953705765307e-05,
"std": 0.029953880235552788,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_v.bias": {
"min": -0.028933702036738396,
"max": 0.027695059776306152,
"mean": -0.00032178848050534725,
"std": 0.012570273131132126,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.2.to_out.0.weight": {
"min": -0.45337191224098206,
"max": 0.44843629002571106,
"mean": 2.4102073439280502e-05,
"std": 0.023851700127124786,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.0.2.to_out.0.bias": {
"min": -0.08852554112672806,
"max": 0.09096554666757584,
"mean": 0.0022833123803138733,
"std": 0.01949877291917801,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.3.g": {
"min": 0.2666127681732178,
"max": 1.0543620586395264,
"mean": 0.5309467911720276,
"std": 0.10404026508331299,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.0.4.ff.0.0.weight": {
"min": -0.5751341581344604,
"max": 0.6088229417800903,
"mean": -0.0004320710140746087,
"std": 0.0386008694767952,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.0.4.ff.0.0.bias": {
"min": -0.18247970938682556,
"max": 0.04547928646206856,
"mean": -0.029448386281728745,
"std": 0.04255641624331474,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.0.4.ff.2.weight": {
"min": -1.166790246963501,
"max": 1.6334140300750732,
"mean": 0.00032607169123366475,
"std": 0.02769557386636734,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.0.4.ff.2.bias": {
"min": -0.16213519871234894,
"max": 0.2053978145122528,
"mean": -0.021131210029125214,
"std": 0.02792428247630596,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.1.g": {
"min": 0.22390854358673096,
"max": 0.8422228693962097,
"mean": 0.4874723255634308,
"std": 0.0749419778585434,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_q.weight": {
"min": -0.2551497519016266,
"max": 0.3057706952095032,
"mean": -7.631031621713191e-06,
"std": 0.03347672149538994,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_q.bias": {
"min": -0.09526324272155762,
"max": 0.11054196208715439,
"mean": 5.9016994782723486e-05,
"std": 0.026952214539051056,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_k.weight": {
"min": -0.29700320959091187,
"max": 0.29560279846191406,
"mean": 5.1945076847914606e-05,
"std": 0.03254617378115654,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_k.bias": {
"min": -5.157034397125244,
"max": 5.077272891998291,
"mean": -0.014557666145265102,
"std": 1.1561598777770996,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_v.weight": {
"min": -0.34469008445739746,
"max": 0.3430800437927246,
"mean": 7.922034274088219e-05,
"std": 0.03006283938884735,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_v.bias": {
"min": -0.03611171245574951,
"max": 0.03316429257392883,
"mean": -0.00014332182763610035,
"std": 0.013021831400692463,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.2.to_out.0.weight": {
"min": -0.3155629634857178,
"max": 0.3745230734348297,
"mean": -2.0780769773409702e-05,
"std": 0.024060120806097984,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.1.2.to_out.0.bias": {
"min": -0.10523121803998947,
"max": 0.12181323021650314,
"mean": -0.0019697900861501694,
"std": 0.028833730146288872,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.3.g": {
"min": 0.31127864122390747,
"max": 1.118981957435608,
"mean": 0.6661038398742676,
"std": 0.09739536792039871,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.1.4.ff.0.0.weight": {
"min": -0.8734181523323059,
"max": 0.6272271275520325,
"mean": 0.0016762978630140424,
"std": 0.04744264855980873,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.1.4.ff.0.0.bias": {
"min": -0.27110713720321655,
"max": 0.03433133661746979,
"mean": -0.04661067947745323,
"std": 0.04056624323129654,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.1.4.ff.2.weight": {
"min": -0.9215274453163147,
"max": 0.9644713997840881,
"mean": 0.0010202918201684952,
"std": 0.0407060943543911,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.1.4.ff.2.bias": {
"min": -0.1444365382194519,
"max": 0.07489711046218872,
"mean": -0.00908645335584879,
"std": 0.02568359486758709,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.1.g": {
"min": 0.23954921960830688,
"max": 0.7114554047584534,
"mean": 0.44711926579475403,
"std": 0.059072595089673996,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_q.weight": {
"min": -0.27211347222328186,
"max": 0.29757410287857056,
"mean": 9.160639820038341e-06,
"std": 0.03547541797161102,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_q.bias": {
"min": -0.11930356919765472,
"max": 0.1185561791062355,
"mean": 0.0007570894667878747,
"std": 0.027588583528995514,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_k.weight": {
"min": -0.2805509567260742,
"max": 0.2793390452861786,
"mean": -7.711815123911947e-05,
"std": 0.03510286659002304,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_k.bias": {
"min": -2.5059573650360107,
"max": 2.5179529190063477,
"mean": 0.02672126702964306,
"std": 0.5862834453582764,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_v.weight": {
"min": -0.22094596922397614,
"max": 0.27129310369491577,
"mean": 2.4950504666776396e-06,
"std": 0.030734829604625702,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_v.bias": {
"min": -0.03352592885494232,
"max": 0.03140881285071373,
"mean": 0.00011744203220587224,
"std": 0.012399573810398579,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.2.to_out.0.weight": {
"min": -0.23510752618312836,
"max": 0.23160243034362793,
"mean": 5.7065204600803554e-05,
"std": 0.02570049650967121,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.2.2.to_out.0.bias": {
"min": -0.13554446399211884,
"max": 0.1277279406785965,
"mean": -0.005496564321219921,
"std": 0.039924751967191696,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.3.g": {
"min": 0.3543228507041931,
"max": 1.169933795928955,
"mean": 0.7103918194770813,
"std": 0.10339365899562836,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.2.4.ff.0.0.weight": {
"min": -0.6172032952308655,
"max": 0.5551565885543823,
"mean": 0.0011604262981563807,
"std": 0.04612047225236893,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.2.4.ff.0.0.bias": {
"min": -0.18880973756313324,
"max": 0.02472936362028122,
"mean": -0.034827686846256256,
"std": 0.028596267104148865,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.2.4.ff.2.weight": {
"min": -1.1306864023208618,
"max": 0.9699204564094543,
"mean": 0.00035697812563739717,
"std": 0.0423479862511158,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.2.4.ff.2.bias": {
"min": -0.5971187949180603,
"max": 0.06284646689891815,
"mean": -0.00487535959109664,
"std": 0.028591454029083252,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.1.g": {
"min": 0.37525925040245056,
"max": 0.938994288444519,
"mean": 0.5923536419868469,
"std": 0.06656986474990845,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_q.weight": {
"min": -0.3912387192249298,
"max": 0.3688672184944153,
"mean": 7.05350175849162e-05,
"std": 0.03718964010477066,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_q.bias": {
"min": -0.11892075091600418,
"max": 0.13641902804374695,
"mean": 0.0009228037670254707,
"std": 0.029190916568040848,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_k.weight": {
"min": -0.6183786392211914,
"max": 0.5081523060798645,
"mean": 1.5137170521484222e-05,
"std": 0.036442697048187256,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_k.bias": {
"min": -8.175475120544434,
"max": 8.77673053741455,
"mean": -0.10916879773139954,
"std": 1.6969348192214966,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_v.weight": {
"min": -0.27656111121177673,
"max": 0.23974747955799103,
"mean": 5.267578671919182e-05,
"std": 0.03261591121554375,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_v.bias": {
"min": -0.051889754831790924,
"max": 0.03952917456626892,
"mean": 9.714082989376038e-05,
"std": 0.012956415303051472,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.2.to_out.0.weight": {
"min": -0.23051224648952484,
"max": 0.23422203958034515,
"mean": -2.1783589545520954e-05,
"std": 0.029392505064606667,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.3.2.to_out.0.bias": {
"min": -0.20386114716529846,
"max": 0.105349101126194,
"mean": -0.004017278086394072,
"std": 0.032608963549137115,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.3.g": {
"min": 0.3398902118206024,
"max": 1.0104986429214478,
"mean": 0.7006295919418335,
"std": 0.09645849466323853,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.3.4.ff.0.0.weight": {
"min": -0.5644850134849548,
"max": 0.8330016136169434,
"mean": 0.0004154921043664217,
"std": 0.04230193421244621,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.3.4.ff.0.0.bias": {
"min": -0.21176111698150635,
"max": 0.030274739488959312,
"mean": -0.03216158226132393,
"std": 0.02647627517580986,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.3.4.ff.2.weight": {
"min": -0.7536418437957764,
"max": 0.7178125381469727,
"mean": -1.392904141539475e-05,
"std": 0.03684176877140999,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.3.4.ff.2.bias": {
"min": -0.2630210220813751,
"max": 0.10589547455310822,
"mean": -0.0030209862161427736,
"std": 0.028848819434642792,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.1.g": {
"min": 0.2840619385242462,
"max": 0.6940633654594421,
"mean": 0.4993802607059479,
"std": 0.04630398005247116,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_q.weight": {
"min": -0.27834540605545044,
"max": 0.23377880454063416,
"mean": -0.00011083983554271981,
"std": 0.03876272216439247,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_q.bias": {
"min": -0.15375865995883942,
"max": 0.12639263272285461,
"mean": -0.002223189687356353,
"std": 0.03333896026015282,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_k.weight": {
"min": -0.413473516702652,
"max": 0.6594987511634827,
"mean": -1.9574425095925108e-05,
"std": 0.039102163165807724,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_k.bias": {
"min": -4.232054233551025,
"max": 4.715608596801758,
"mean": -0.020489608868956566,
"std": 1.0068248510360718,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_v.weight": {
"min": -0.24494825303554535,
"max": 0.20708487927913666,
"mean": 4.434686343302019e-05,
"std": 0.03396739438176155,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_v.bias": {
"min": -0.034493304789066315,
"max": 0.04486649110913277,
"mean": -2.654863055795431e-05,
"std": 0.012638254091143608,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.2.to_out.0.weight": {
"min": -0.2005356252193451,
"max": 0.2055814564228058,
"mean": -3.0033888833713718e-05,
"std": 0.031025094911456108,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.4.2.to_out.0.bias": {
"min": -0.19959698617458344,
"max": 0.11300574988126755,
"mean": -0.002902751788496971,
"std": 0.03449735790491104,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.3.g": {
"min": 0.3668424189090729,
"max": 1.05502188205719,
"mean": 0.6704874634742737,
"std": 0.06617505103349686,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.4.4.ff.0.0.weight": {
"min": -0.3976363241672516,
"max": 0.5017815828323364,
"mean": -3.87727704946883e-05,
"std": 0.041137050837278366,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.4.4.ff.0.0.bias": {
"min": -0.12772123515605927,
"max": 0.026762252673506737,
"mean": -0.03051420859992504,
"std": 0.021863147616386414,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.4.4.ff.2.weight": {
"min": -0.44920089840888977,
"max": 0.4333121180534363,
"mean": 7.599063974339515e-05,
"std": 0.034896738827228546,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.4.4.ff.2.bias": {
"min": -0.2671979069709778,
"max": 0.07298687100410461,
"mean": -0.0010975392069667578,
"std": 0.023116325959563255,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.1.g": {
"min": 0.28697913885116577,
"max": 0.6839067339897156,
"mean": 0.5244333744049072,
"std": 0.047293804585933685,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_q.weight": {
"min": -0.22255805134773254,
"max": 0.22290681302547455,
"mean": 1.621080627955962e-05,
"std": 0.03895403817296028,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_q.bias": {
"min": -0.13629747927188873,
"max": 0.109336718916893,
"mean": 0.0002461877593304962,
"std": 0.02917083166539669,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_k.weight": {
"min": -0.3738900125026703,
"max": 0.43744465708732605,
"mean": -9.668656275607646e-06,
"std": 0.03929208964109421,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_k.bias": {
"min": -3.840332269668579,
"max": 4.992400646209717,
"mean": 0.009748304262757301,
"std": 0.8444803953170776,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_v.weight": {
"min": -0.22292070090770721,
"max": 0.21977820992469788,
"mean": -4.448638719622977e-07,
"std": 0.03441440686583519,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_v.bias": {
"min": -0.04357949644327164,
"max": 0.03590534254908562,
"mean": -0.000258232990745455,
"std": 0.012078864499926567,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.2.to_out.0.weight": {
"min": -0.21297886967658997,
"max": 0.18814441561698914,
"mean": -1.71422834682744e-05,
"std": 0.031540658324956894,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.5.2.to_out.0.bias": {
"min": -0.1805071383714676,
"max": 0.12073972076177597,
"mean": -0.00239769509062171,
"std": 0.04125608131289482,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.3.g": {
"min": 0.4227307438850403,
"max": 0.9400621056556702,
"mean": 0.662601888179779,
"std": 0.056538671255111694,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.5.4.ff.0.0.weight": {
"min": -0.37151503562927246,
"max": 0.4761146008968353,
"mean": -8.195374539354816e-05,
"std": 0.040896203368902206,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.5.4.ff.0.0.bias": {
"min": -0.20797580480575562,
"max": 0.027151037007570267,
"mean": -0.030222713947296143,
"std": 0.021336952224373817,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.5.4.ff.2.weight": {
"min": -0.33968234062194824,
"max": 0.7333835959434509,
"mean": 8.077031816355884e-05,
"std": 0.034772153943777084,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.5.4.ff.2.bias": {
"min": -0.23987196385860443,
"max": 0.05037139728665352,
"mean": -0.0011877692304551601,
"std": 0.020454443991184235,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.1.g": {
"min": 0.30607396364212036,
"max": 0.652435839176178,
"mean": 0.5250428915023804,
"std": 0.04590361937880516,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_q.weight": {
"min": -0.3039066791534424,
"max": 0.21754606068134308,
"mean": 7.030011329334229e-05,
"std": 0.03950100764632225,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_q.bias": {
"min": -0.14914348721504211,
"max": 0.13110090792179108,
"mean": 0.00035085732815787196,
"std": 0.030418941751122475,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_k.weight": {
"min": -0.2568054795265198,
"max": 0.20193904638290405,
"mean": 3.147923416690901e-05,
"std": 0.03949080780148506,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_k.bias": {
"min": -2.3329901695251465,
"max": 2.3725619316101074,
"mean": -0.02622254565358162,
"std": 0.4494195282459259,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_v.weight": {
"min": -0.18853308260440826,
"max": 0.2103482335805893,
"mean": 3.745816502487287e-05,
"std": 0.03479913994669914,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_v.bias": {
"min": -0.03156094253063202,
"max": 0.035385265946388245,
"mean": -0.0001973491598619148,
"std": 0.012292337603867054,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.2.to_out.0.weight": {
"min": -0.1882481426000595,
"max": 0.17012155055999756,
"mean": -6.810311606386676e-05,
"std": 0.03217574581503868,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.6.2.to_out.0.bias": {
"min": -0.13900111615657806,
"max": 0.13692621886730194,
"mean": -0.002514890395104885,
"std": 0.051281191408634186,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.3.g": {
"min": 0.46707433462142944,
"max": 0.9541991353034973,
"mean": 0.6688030958175659,
"std": 0.052486222237348557,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.6.4.ff.0.0.weight": {
"min": -0.32425403594970703,
"max": 0.30980852246284485,
"mean": -1.290425643674098e-06,
"std": 0.040951915085315704,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.6.4.ff.0.0.bias": {
"min": -0.12465585768222809,
"max": 0.02537902072072029,
"mean": -0.030681122094392776,
"std": 0.0198006983846426,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.6.4.ff.2.weight": {
"min": -0.43958571553230286,
"max": 0.44490763545036316,
"mean": 9.539163875160739e-05,
"std": 0.0351250097155571,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.6.4.ff.2.bias": {
"min": -0.2243558019399643,
"max": 0.0517578125,
"mean": -0.0011802279623225331,
"std": 0.018464019522070885,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.1.g": {
"min": 0.33896756172180176,
"max": 0.7381694912910461,
"mean": 0.5586157441139221,
"std": 0.04119841381907463,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_q.weight": {
"min": -0.27227938175201416,
"max": 0.27836883068084717,
"mean": 1.999387313844636e-05,
"std": 0.041062600910663605,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_q.bias": {
"min": -0.13660800457000732,
"max": 0.1392778903245926,
"mean": 0.0004841584013774991,
"std": 0.02658114954829216,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_k.weight": {
"min": -0.4896349310874939,
"max": 0.3551800847053528,
"mean": 8.872073522070423e-05,
"std": 0.04069973900914192,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_k.bias": {
"min": -2.293769121170044,
"max": 1.742555856704712,
"mean": -0.02106180600821972,
"std": 0.49974092841148376,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_v.weight": {
"min": -0.2175416797399521,
"max": 0.19781090319156647,
"mean": -4.052485746797174e-05,
"std": 0.03423763066530228,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_v.bias": {
"min": -0.04145532101392746,
"max": 0.038727227598428726,
"mean": -0.00013765225594397634,
"std": 0.012874336913228035,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.2.to_out.0.weight": {
"min": -0.177314892411232,
"max": 0.1832207590341568,
"mean": 4.75629567517899e-05,
"std": 0.03156043216586113,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.7.2.to_out.0.bias": {
"min": -0.1798381805419922,
"max": 0.18348462879657745,
"mean": -0.002212759107351303,
"std": 0.054820165038108826,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.3.g": {
"min": 0.4742435812950134,
"max": 1.0238897800445557,
"mean": 0.6451865434646606,
"std": 0.05008064582943916,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.7.4.ff.0.0.weight": {
"min": -0.2714613080024719,
"max": 0.3092961311340332,
"mean": 0.00011265614011790603,
"std": 0.04068758338689804,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.7.4.ff.0.0.bias": {
"min": -0.1055683121085167,
"max": 0.026772309094667435,
"mean": -0.029506118968129158,
"std": 0.017915068194270134,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.7.4.ff.2.weight": {
"min": -0.33880147337913513,
"max": 0.3287900686264038,
"mean": 5.556903124670498e-05,
"std": 0.03441847860813141,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.7.4.ff.2.bias": {
"min": -0.18144568800926208,
"max": 0.04239530488848686,
"mean": -0.001068950048647821,
"std": 0.017201630398631096,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.1.g": {
"min": 0.3253972828388214,
"max": 0.68559730052948,
"mean": 0.5111000537872314,
"std": 0.03672371804714203,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_q.weight": {
"min": -0.23373860120773315,
"max": 0.22572296857833862,
"mean": -3.580976772354916e-05,
"std": 0.039181455969810486,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_q.bias": {
"min": -0.11530666053295135,
"max": 0.1317266821861267,
"mean": 0.00015847355825826526,
"std": 0.029152128845453262,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_k.weight": {
"min": -0.3521575629711151,
"max": 0.2847552001476288,
"mean": 7.120977898011915e-06,
"std": 0.039250005036592484,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_k.bias": {
"min": -4.126590728759766,
"max": 3.538623332977295,
"mean": -0.01155401673167944,
"std": 0.6819069385528564,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_v.weight": {
"min": -0.21105211973190308,
"max": 0.20891818404197693,
"mean": 3.4748343750834465e-05,
"std": 0.03448968380689621,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_v.bias": {
"min": -0.03559347987174988,
"max": 0.04803197458386421,
"mean": 0.0007964627584442496,
"std": 0.012855397537350655,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.2.to_out.0.weight": {
"min": -0.21038679778575897,
"max": 0.1929050087928772,
"mean": -1.3255728390504373e-06,
"std": 0.0317002572119236,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.8.2.to_out.0.bias": {
"min": -0.18651214241981506,
"max": 0.17674075067043304,
"mean": -0.002840832807123661,
"std": 0.05859901383519173,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.3.g": {
"min": 0.4748058021068573,
"max": 1.0396208763122559,
"mean": 0.6513342261314392,
"std": 0.049332328140735626,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.8.4.ff.0.0.weight": {
"min": -0.2482759803533554,
"max": 0.3290877640247345,
"mean": 0.00018071771773975343,
"std": 0.04057670012116432,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.8.4.ff.0.0.bias": {
"min": -0.12517917156219482,
"max": 0.02484654076397419,
"mean": -0.030485937371850014,
"std": 0.017585651949048042,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.8.4.ff.2.weight": {
"min": -0.42004328966140747,
"max": 0.48050060868263245,
"mean": -1.1724823707481846e-06,
"std": 0.03540315851569176,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.8.4.ff.2.bias": {
"min": -0.15136678516864777,
"max": 0.04356072470545769,
"mean": 4.775111301569268e-05,
"std": 0.014870403334498405,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.1.g": {
"min": 0.3155934810638428,
"max": 0.6807596683502197,
"mean": 0.5528346300125122,
"std": 0.04051977023482323,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_q.weight": {
"min": -0.2063884735107422,
"max": 0.21910899877548218,
"mean": 3.103859489783645e-05,
"std": 0.038303472101688385,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_q.bias": {
"min": -0.13769029080867767,
"max": 0.1125277578830719,
"mean": 1.9220009562559426e-05,
"std": 0.02578623965382576,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_k.weight": {
"min": -0.40236374735832214,
"max": 0.37038296461105347,
"mean": 2.613713513710536e-05,
"std": 0.03818493336439133,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_k.bias": {
"min": -3.7654759883880615,
"max": 2.864607572555542,
"mean": 0.0011372193694114685,
"std": 0.51633220911026,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_v.weight": {
"min": -0.20273104310035706,
"max": 0.1974526047706604,
"mean": 2.9206170438556e-05,
"std": 0.034301165491342545,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_v.bias": {
"min": -0.05080447345972061,
"max": 0.0398997887969017,
"mean": -0.00042000875691883266,
"std": 0.013411123305559158,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.2.to_out.0.weight": {
"min": -0.19611378014087677,
"max": 0.20161780714988708,
"mean": -1.2710506780422293e-05,
"std": 0.03180883079767227,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.9.2.to_out.0.bias": {
"min": -0.19288454949855804,
"max": 0.1946749985218048,
"mean": -0.002961306367069483,
"std": 0.06252170354127884,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.3.g": {
"min": 0.3495900332927704,
"max": 1.0818731784820557,
"mean": 0.6670873165130615,
"std": 0.054898131638765335,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.9.4.ff.0.0.weight": {
"min": -0.22497375309467316,
"max": 0.25112366676330566,
"mean": 0.00035900043440051377,
"std": 0.04076608642935753,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.9.4.ff.0.0.bias": {
"min": -0.09095952659845352,
"max": 0.0440162755548954,
"mean": -0.030070394277572632,
"std": 0.017598489299416542,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.9.4.ff.2.weight": {
"min": -0.35297849774360657,
"max": 0.3037008047103882,
"mean": -4.511567021836527e-05,
"std": 0.03712863847613335,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.9.4.ff.2.bias": {
"min": -0.1615627110004425,
"max": 0.06344226002693176,
"mean": -7.402076153084636e-05,
"std": 0.019400237128138542,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.1.g": {
"min": 0.3484867811203003,
"max": 0.7205584049224854,
"mean": 0.5422928333282471,
"std": 0.03884059190750122,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_q.weight": {
"min": -0.21938610076904297,
"max": 0.223092183470726,
"mean": -1.1128584446851164e-05,
"std": 0.0392366424202919,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_q.bias": {
"min": -0.11822070181369781,
"max": 0.1703757494688034,
"mean": 0.0002712813438847661,
"std": 0.025094762444496155,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_k.weight": {
"min": -0.2461908757686615,
"max": 0.3006460666656494,
"mean": -3.654139436548576e-05,
"std": 0.03893598914146423,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_k.bias": {
"min": -3.499889850616455,
"max": 3.708961009979248,
"mean": 0.01583799161016941,
"std": 0.781475305557251,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_v.weight": {
"min": -0.21841062605381012,
"max": 0.23724044859409332,
"mean": -1.4060610737942625e-05,
"std": 0.03630809485912323,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_v.bias": {
"min": -0.04710822552442551,
"max": 0.05138855054974556,
"mean": 0.00048449443420395255,
"std": 0.013518092222511768,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.2.to_out.0.weight": {
"min": -0.21374864876270294,
"max": 0.2171718180179596,
"mean": 5.6465847592335194e-05,
"std": 0.03361979499459267,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.10.2.to_out.0.bias": {
"min": -0.21103325486183167,
"max": 0.2311553806066513,
"mean": -0.005100366659462452,
"std": 0.06185431033372879,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.3.g": {
"min": 0.36209091544151306,
"max": 1.0989015102386475,
"mean": 0.6992126703262329,
"std": 0.053264226764440536,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.10.4.ff.0.0.weight": {
"min": -0.23423242568969727,
"max": 0.24471710622310638,
"mean": 0.00046349214971996844,
"std": 0.04127512127161026,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.10.4.ff.0.0.bias": {
"min": -0.09780248254537582,
"max": 0.06824193894863129,
"mean": -0.031424038112163544,
"std": 0.018106156960129738,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.10.4.ff.2.weight": {
"min": -0.301416277885437,
"max": 0.35142549872398376,
"mean": -8.288547542179003e-05,
"std": 0.04028111323714256,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.10.4.ff.2.bias": {
"min": -0.15196339786052704,
"max": 0.14944323897361755,
"mean": 0.0002634537231642753,
"std": 0.023027226328849792,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.1.g": {
"min": 1.0,
"max": 1.0,
"mean": 1.0,
"std": 0.0,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_q.weight": {
"min": -0.031249936670064926,
"max": 0.031249839812517166,
"mean": -1.9292721844976768e-05,
"std": 0.01804409734904766,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_q.bias": {
"min": -0.031226642429828644,
"max": 0.03100142627954483,
"mean": -0.0010842883493751287,
"std": 0.01795371063053608,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_k.weight": {
"min": -0.031249966472387314,
"max": 0.031249895691871643,
"mean": 3.5441100862954045e-06,
"std": 0.018044503405690193,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_k.bias": {
"min": -0.031156372278928757,
"max": 0.031184475868940353,
"mean": 0.0003338930255267769,
"std": 0.018065759912133217,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.11.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.11.2.to_out.0.bias": {
"min": -0.0003838505072053522,
"max": 0.00040078736492432654,
"mean": 7.502898370148614e-06,
"std": 0.00012165026419097558,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.3.g": {
"min": 0.9996746778488159,
"max": 1.0017435550689697,
"mean": 1.0005855560302734,
"std": 0.0003091032849624753,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.11.4.ff.0.0.weight": {
"min": -0.03248094022274017,
"max": 0.03274688497185707,
"mean": -1.2105063433409669e-05,
"std": 0.01805892214179039,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.11.4.ff.0.0.bias": {
"min": -0.031171226873993874,
"max": 0.03214619308710098,
"mean": 0.0004906345857307315,
"std": 0.017989112064242363,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.11.4.ff.2.weight": {
"min": -0.0009105296921916306,
"max": 0.001230148016475141,
"mean": 2.7432847673480865e-06,
"std": 0.0001725118636386469,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.11.4.ff.2.bias": {
"min": -0.00036263937363401055,
"max": 0.00041731935925781727,
"mean": 7.396344699373003e-06,
"std": 0.00011976793757639825,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.1.g": {
"min": 0.38287991285324097,
"max": 0.7182613015174866,
"mean": 0.5806185603141785,
"std": 0.03863256797194481,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_q.weight": {
"min": -0.23785854876041412,
"max": 0.19614756107330322,
"mean": 2.640879392856732e-05,
"std": 0.037470731884241104,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_q.bias": {
"min": -0.11855358630418777,
"max": 0.16578993201255798,
"mean": 0.0009884096216410398,
"std": 0.027530910447239876,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_k.weight": {
"min": -0.2458752989768982,
"max": 0.500349223613739,
"mean": -5.065255027147941e-05,
"std": 0.03762831538915634,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_k.bias": {
"min": -3.936182975769043,
"max": 3.763556957244873,
"mean": -0.003569458145648241,
"std": 0.6807414293289185,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_v.weight": {
"min": -0.22705353796482086,
"max": 0.251341313123703,
"mean": -1.142405926657375e-05,
"std": 0.03743990138173103,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_v.bias": {
"min": -0.07149660587310791,
"max": 0.08067727833986282,
"mean": -0.0005162369925528765,
"std": 0.015656527131795883,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.2.to_out.0.weight": {
"min": -0.22786642611026764,
"max": 0.2578106224536896,
"mean": -2.8714632207993418e-05,
"std": 0.035426877439022064,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.12.2.to_out.0.bias": {
"min": -0.20022797584533691,
"max": 0.21474605798721313,
"mean": -0.005530310794711113,
"std": 0.0683104544878006,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.3.g": {
"min": 0.4048909544944763,
"max": 1.1872107982635498,
"mean": 0.7378276586532593,
"std": 0.05486491322517395,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.12.4.ff.0.0.weight": {
"min": -0.22101044654846191,
"max": 0.2458520382642746,
"mean": 0.0005211633397266269,
"std": 0.04134228080511093,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.12.4.ff.0.0.bias": {
"min": -0.10363762825727463,
"max": 0.023918237537145615,
"mean": -0.03266144543886185,
"std": 0.018866004422307014,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.12.4.ff.2.weight": {
"min": -0.4487850069999695,
"max": 0.42181524634361267,
"mean": -0.00043266150169074535,
"std": 0.04690360650420189,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.12.4.ff.2.bias": {
"min": -0.25105422735214233,
"max": 0.46941903233528137,
"mean": 0.003198462538421154,
"std": 0.044503308832645416,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.0.weight": {
"min": -0.3172111511230469,
"max": 0.33329516649246216,
"mean": -2.550867066020146e-05,
"std": 0.021290993317961693,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.13.1.g": {
"min": 0.32461607456207275,
"max": 0.6840938329696655,
"mean": 0.5709556341171265,
"std": 0.04454263672232628,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_q.weight": {
"min": -0.16456733644008636,
"max": 0.17394505441188812,
"mean": -4.8416688514407724e-05,
"std": 0.03318499028682709,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_q.bias": {
"min": -0.1864674687385559,
"max": 0.14258594810962677,
"mean": 3.8281112210825086e-05,
"std": 0.029655346646904945,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_k.weight": {
"min": -0.3803539276123047,
"max": 0.2457817941904068,
"mean": -1.002950102702016e-05,
"std": 0.032765936106443405,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_k.bias": {
"min": -3.6502115726470947,
"max": 3.285125494003296,
"mean": -0.014261167496442795,
"std": 0.9845166206359863,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_v.weight": {
"min": -0.23496489226818085,
"max": 0.24718151986598969,
"mean": -1.8079399524140172e-05,
"std": 0.041703000664711,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_v.bias": {
"min": -0.07261228561401367,
"max": 0.15409623086452484,
"mean": 0.0006618116749450564,
"std": 0.02513669617474079,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.2.to_out.0.weight": {
"min": -0.26620712876319885,
"max": 0.24820521473884583,
"mean": -1.5344019629992545e-05,
"std": 0.04014336317777634,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.13.2.to_out.0.bias": {
"min": -0.18921570479869843,
"max": 0.19427257776260376,
"mean": -0.0012257307535037398,
"std": 0.0666433721780777,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.3.g": {
"min": 0.32903727889060974,
"max": 0.9973482489585876,
"mean": 0.7190757393836975,
"std": 0.051972683519124985,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.13.4.ff.0.0.weight": {
"min": -0.23141932487487793,
"max": 0.24504587054252625,
"mean": 0.0001826788648031652,
"std": 0.04090685769915581,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.13.4.ff.0.0.bias": {
"min": -0.11396601796150208,
"max": 0.01875537633895874,
"mean": -0.04246020317077637,
"std": 0.018833719193935394,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.13.4.ff.2.weight": {
"min": -0.38934653997421265,
"max": 0.4067343473434448,
"mean": -2.1657660909113474e-05,
"std": 0.04854125902056694,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.13.4.ff.2.bias": {
"min": -0.6919497847557068,
"max": 0.411848247051239,
"mean": 0.0008590769721195102,
"std": 0.06023983284831047,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.0.weight": {
"min": -0.000941734469961375,
"max": 1.0006029605865479,
"mean": 0.00048819385119713843,
"std": 0.02209211327135563,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.14.1.g": {
"min": 1.0,
"max": 1.0,
"mean": 1.0,
"std": 0.0,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_q.weight": {
"min": -0.031249970197677612,
"max": 0.031249817460775375,
"mean": -2.1022657165303826e-05,
"std": 0.018035436049103737,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_q.bias": {
"min": -0.03122086077928543,
"max": 0.031233571469783783,
"mean": -0.0006771883927285671,
"std": 0.01782997138798237,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_k.weight": {
"min": -0.03124987706542015,
"max": 0.031249921768903732,
"mean": -8.839062502374873e-06,
"std": 0.01803446188569069,
"sparsity": 9.5367431640625e-07,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_k.bias": {
"min": -0.031232360750436783,
"max": 0.031245984137058258,
"mean": -0.0007298353011719882,
"std": 0.017944591119885445,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_v.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_v.bias": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024
]
},
"transformer.layers.14.2.to_out.0.weight": {
"min": 0.0,
"max": 0.0,
"mean": 0.0,
"std": 0.0,
"sparsity": 1.0,
"shape": [
1024,
1024
]
},
"transformer.layers.14.2.to_out.0.bias": {
"min": -0.0003224269312340766,
"max": 0.0002993023081216961,
"mean": 6.5217936935368925e-06,
"std": 0.0001044638265739195,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.3.g": {
"min": 0.9996813535690308,
"max": 1.0015599727630615,
"mean": 1.000339150428772,
"std": 0.0002295201556989923,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.14.4.ff.0.0.weight": {
"min": -0.032516807317733765,
"max": 0.03226118162274361,
"mean": 4.161014203418745e-06,
"std": 0.018049873411655426,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.14.4.ff.0.0.bias": {
"min": -0.031123636290431023,
"max": 0.03165753185749054,
"mean": 0.0003850722569040954,
"std": 0.018070610240101814,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.14.4.ff.2.weight": {
"min": -0.0009010994690470397,
"max": 0.0009490308002568781,
"mean": 2.8105064302508254e-06,
"std": 0.00016459461767226458,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.14.4.ff.2.bias": {
"min": -0.00032089874730445445,
"max": 0.00031345486058853567,
"mean": 6.42746908852132e-06,
"std": 0.00010272208601236343,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.0.weight": {
"min": -0.23485393822193146,
"max": 0.27267447113990784,
"mean": 6.709969511575764e-06,
"std": 0.018812596797943115,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.15.1.g": {
"min": 0.32135409116744995,
"max": 0.6922963857650757,
"mean": 0.5815727710723877,
"std": 0.045748595148324966,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_q.weight": {
"min": -0.1818080097436905,
"max": 0.19750945270061493,
"mean": -1.1748516044463031e-05,
"std": 0.03318887948989868,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_q.bias": {
"min": -0.16036057472229004,
"max": 0.12932586669921875,
"mean": -0.0010664488654583693,
"std": 0.03411008045077324,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_k.weight": {
"min": -0.33175674080848694,
"max": 0.31088003516197205,
"mean": -1.0311603546142578e-05,
"std": 0.0322394073009491,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_k.bias": {
"min": -7.791174411773682,
"max": 8.749550819396973,
"mean": 0.09336872398853302,
"std": 1.6178374290466309,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_v.weight": {
"min": -0.23367103934288025,
"max": 0.2417406141757965,
"mean": 4.146722494624555e-05,
"std": 0.04086144268512726,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_v.bias": {
"min": -0.07598260790109634,
"max": 0.06560970842838287,
"mean": 0.0004800831666216254,
"std": 0.019395504146814346,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.2.to_out.0.weight": {
"min": -0.24560654163360596,
"max": 0.23375561833381653,
"mean": -2.9877701308578253e-06,
"std": 0.03943600133061409,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.15.2.to_out.0.bias": {
"min": -0.1627652794122696,
"max": 0.16063357889652252,
"mean": 0.0016337584238499403,
"std": 0.06525594741106033,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.3.g": {
"min": 0.5568895936012268,
"max": 0.9421334266662598,
"mean": 0.7127605080604553,
"std": 0.03978221118450165,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.15.4.ff.0.0.weight": {
"min": -0.22847090661525726,
"max": 0.25493934750556946,
"mean": -4.550522498902865e-05,
"std": 0.040581200271844864,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.15.4.ff.0.0.bias": {
"min": -0.1344706267118454,
"max": 0.022221069782972336,
"mean": -0.04133939743041992,
"std": 0.01835877075791359,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.15.4.ff.2.weight": {
"min": -0.4210115969181061,
"max": 0.3920403718948364,
"mean": -4.534296749625355e-06,
"std": 0.047791384160518646,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.15.4.ff.2.bias": {
"min": -0.6062420010566711,
"max": 0.6502339243888855,
"mean": 0.0015842054272070527,
"std": 0.05679100751876831,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.0.weight": {
"min": -0.252038836479187,
"max": 0.32106301188468933,
"mean": -6.296660103544127e-06,
"std": 0.019615648314356804,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.16.1.g": {
"min": 0.35961171984672546,
"max": 0.6809778809547424,
"mean": 0.5706169605255127,
"std": 0.042782142758369446,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_q.weight": {
"min": -0.22040791809558868,
"max": 0.17709863185882568,
"mean": -3.522756742313504e-05,
"std": 0.03430448845028877,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_q.bias": {
"min": -0.16271811723709106,
"max": 0.23246890306472778,
"mean": 0.0003684491675812751,
"std": 0.03280302509665489,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_k.weight": {
"min": -0.26368996500968933,
"max": 0.23957668244838715,
"mean": -5.283607606543228e-05,
"std": 0.03390355408191681,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_k.bias": {
"min": -4.8473591804504395,
"max": 5.083388805389404,
"mean": 0.04383918642997742,
"std": 1.2279300689697266,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_v.weight": {
"min": -0.24628077447414398,
"max": 0.2501535415649414,
"mean": 7.219994586193934e-05,
"std": 0.04399203881621361,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_v.bias": {
"min": -0.062493205070495605,
"max": 0.054467517882585526,
"mean": 0.0006505983183160424,
"std": 0.01718413643538952,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.2.to_out.0.weight": {
"min": -0.2860679030418396,
"max": 0.27162545919418335,
"mean": -4.9951679102377966e-05,
"std": 0.04299019277095795,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.16.2.to_out.0.bias": {
"min": -0.16042187809944153,
"max": 0.1700378805398941,
"mean": -0.0028904015198349953,
"std": 0.05927493795752525,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.3.g": {
"min": 0.5196736454963684,
"max": 0.931270182132721,
"mean": 0.7133467197418213,
"std": 0.03808481991291046,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.16.4.ff.0.0.weight": {
"min": -0.2380017340183258,
"max": 0.24893511831760406,
"mean": 0.00046494320849888027,
"std": 0.04046032205224037,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.16.4.ff.0.0.bias": {
"min": -0.1442948430776596,
"max": 0.041139233857393265,
"mean": -0.03967897593975067,
"std": 0.020518682897090912,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.16.4.ff.2.weight": {
"min": -0.5318877696990967,
"max": 0.5818965435028076,
"mean": 6.336260412354022e-06,
"std": 0.048867613077163696,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.16.4.ff.2.bias": {
"min": -0.5183113813400269,
"max": 0.4925517439842224,
"mean": 0.0023608217015862465,
"std": 0.053406503051519394,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.0.weight": {
"min": -0.2738274037837982,
"max": 0.31547796726226807,
"mean": 1.8216255739389453e-06,
"std": 0.02005232311785221,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.17.1.g": {
"min": 0.3659067749977112,
"max": 0.7100387215614319,
"mean": 0.5930584073066711,
"std": 0.04572707787156105,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_q.weight": {
"min": -0.21076832711696625,
"max": 0.19927603006362915,
"mean": 3.0815259378869087e-05,
"std": 0.03487056866288185,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_q.bias": {
"min": -0.186960831284523,
"max": 0.20310287177562714,
"mean": 0.0009555225260555744,
"std": 0.03147275000810623,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_k.weight": {
"min": -0.28951019048690796,
"max": 0.33969932794570923,
"mean": -4.744817124446854e-05,
"std": 0.034591346979141235,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_k.bias": {
"min": -3.8711647987365723,
"max": 3.3820366859436035,
"mean": 0.01444312371313572,
"std": 0.8576698899269104,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_v.weight": {
"min": -0.2244085818529129,
"max": 0.249923974275589,
"mean": -3.961446964240167e-06,
"std": 0.04223531484603882,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_v.bias": {
"min": -0.05502909794449806,
"max": 0.04645157977938652,
"mean": -2.0665102056227624e-05,
"std": 0.01583181880414486,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.2.to_out.0.weight": {
"min": -0.2927229106426239,
"max": 0.2906007766723633,
"mean": -7.488439223379828e-06,
"std": 0.04195013642311096,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.17.2.to_out.0.bias": {
"min": -0.12459567189216614,
"max": 0.25878894329071045,
"mean": -0.0032436519395560026,
"std": 0.053140122443437576,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.3.g": {
"min": 0.4563407599925995,
"max": 0.8428970575332642,
"mean": 0.7054145932197571,
"std": 0.03490997478365898,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.17.4.ff.0.0.weight": {
"min": -0.5114501714706421,
"max": 0.3482079803943634,
"mean": 0.00034245854476466775,
"std": 0.04020575433969498,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.17.4.ff.0.0.bias": {
"min": -0.18575434386730194,
"max": 0.03953104466199875,
"mean": -0.03936902433633804,
"std": 0.021325672045350075,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.17.4.ff.2.weight": {
"min": -0.5437595248222351,
"max": 0.5556712746620178,
"mean": -7.024264050414786e-05,
"std": 0.05074309557676315,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.17.4.ff.2.bias": {
"min": -0.5109111666679382,
"max": 0.6631372570991516,
"mean": 0.002439212054014206,
"std": 0.049490757286548615,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.0.weight": {
"min": -0.33253294229507446,
"max": 0.2652721107006073,
"mean": 3.378802830411587e-06,
"std": 0.019389795139431953,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.18.1.g": {
"min": 0.3220270276069641,
"max": 0.7649413347244263,
"mean": 0.6509413719177246,
"std": 0.045111026614904404,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_q.weight": {
"min": -0.2494993954896927,
"max": 0.21881401538848877,
"mean": -2.360827238589991e-06,
"std": 0.03650495037436485,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_q.bias": {
"min": -0.3266308009624481,
"max": 0.28657323122024536,
"mean": -0.0006807027384638786,
"std": 0.038520634174346924,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_k.weight": {
"min": -0.30951929092407227,
"max": 0.36978626251220703,
"mean": 6.48990971967578e-05,
"std": 0.036245379596948624,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_k.bias": {
"min": -4.710280895233154,
"max": 5.798713684082031,
"mean": 0.037927284836769104,
"std": 1.4116240739822388,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_v.weight": {
"min": -0.22114244103431702,
"max": 0.20574785768985748,
"mean": -7.537077181041241e-05,
"std": 0.04249110445380211,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_v.bias": {
"min": -0.07735679298639297,
"max": 0.05145302414894104,
"mean": -0.0009192783036269248,
"std": 0.016400594264268875,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.2.to_out.0.weight": {
"min": -0.3307357728481293,
"max": 0.32934609055519104,
"mean": -4.647547484637471e-06,
"std": 0.042797382920980453,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.18.2.to_out.0.bias": {
"min": -0.28440576791763306,
"max": 0.11188910901546478,
"mean": -0.0012069176882505417,
"std": 0.0469915047287941,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.3.g": {
"min": 0.4862346351146698,
"max": 0.8851982355117798,
"mean": 0.7373509407043457,
"std": 0.03795893117785454,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.18.4.ff.0.0.weight": {
"min": -0.3612706959247589,
"max": 0.27453744411468506,
"mean": 5.114857412991114e-05,
"std": 0.04065178707242012,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.18.4.ff.0.0.bias": {
"min": -0.24725216627120972,
"max": 0.04655319079756737,
"mean": -0.03925145044922829,
"std": 0.023245742544531822,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.18.4.ff.2.weight": {
"min": -0.625215470790863,
"max": 0.5962166786193848,
"mean": -5.8090816310141236e-05,
"std": 0.05312598869204521,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.18.4.ff.2.bias": {
"min": -0.7085027694702148,
"max": 0.2653276026248932,
"mean": 0.0009165835799649358,
"std": 0.0511946901679039,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.0.weight": {
"min": -0.34328368306159973,
"max": 0.3035609722137451,
"mean": 1.4504064438369824e-07,
"std": 0.019138522446155548,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.19.1.g": {
"min": 0.3498779833316803,
"max": 0.7813707590103149,
"mean": 0.6387293338775635,
"std": 0.049000099301338196,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_q.weight": {
"min": -0.20522303879261017,
"max": 0.20651094615459442,
"mean": -5.9693807997973636e-05,
"std": 0.03769965097308159,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_q.bias": {
"min": -0.25792619585990906,
"max": 0.2676540017127991,
"mean": -0.0004065552493557334,
"std": 0.044568419456481934,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_k.weight": {
"min": -0.3535814583301544,
"max": 0.32190999388694763,
"mean": -7.394870408461429e-06,
"std": 0.037208281457424164,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_k.bias": {
"min": -5.253505706787109,
"max": 4.198240280151367,
"mean": -0.026390478014945984,
"std": 1.0056747198104858,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_v.weight": {
"min": -0.2384454905986786,
"max": 0.24342015385627747,
"mean": -2.5527655452606268e-05,
"std": 0.043215684592723846,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_v.bias": {
"min": -0.06227009370923042,
"max": 0.05663022771477699,
"mean": 0.0003446021000854671,
"std": 0.01414022222161293,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.2.to_out.0.weight": {
"min": -0.43697887659072876,
"max": 0.3737882673740387,
"mean": 1.4649482181994244e-05,
"std": 0.04412706196308136,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.19.2.to_out.0.bias": {
"min": -0.09632225334644318,
"max": 0.1757834255695343,
"mean": -0.0006590378470718861,
"std": 0.03513453155755997,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.3.g": {
"min": 0.4219363331794739,
"max": 1.0674819946289062,
"mean": 0.7483711838722229,
"std": 0.041829537600278854,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.19.4.ff.0.0.weight": {
"min": -0.26578643918037415,
"max": 0.29607900977134705,
"mean": -7.925635145511478e-05,
"std": 0.04081210494041443,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.19.4.ff.0.0.bias": {
"min": -0.18497370183467865,
"max": 0.04346155747771263,
"mean": -0.03679885342717171,
"std": 0.025566671043634415,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.19.4.ff.2.weight": {
"min": -0.45727846026420593,
"max": 0.48611682653427124,
"mean": 4.68605212518014e-05,
"std": 0.05422008037567139,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.19.4.ff.2.bias": {
"min": -0.285878986120224,
"max": 0.5506833791732788,
"mean": -0.0008855935884639621,
"std": 0.047791752964258194,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.0.weight": {
"min": -0.2927459478378296,
"max": 0.32270148396492004,
"mean": 6.155781647976255e-06,
"std": 0.019972333684563637,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.20.1.g": {
"min": 0.29097816348075867,
"max": 0.7588945627212524,
"mean": 0.6507570743560791,
"std": 0.05195188894867897,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_q.weight": {
"min": -0.24343979358673096,
"max": 0.2611932158470154,
"mean": -5.595570200966904e-06,
"std": 0.039616428315639496,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_q.bias": {
"min": -0.2672193646430969,
"max": 0.19968828558921814,
"mean": -0.0008741158526390791,
"std": 0.051719244569540024,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_k.weight": {
"min": -0.2713148593902588,
"max": 0.25280529260635376,
"mean": 4.686854481406044e-06,
"std": 0.03871333599090576,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_k.bias": {
"min": -12.945391654968262,
"max": 15.922587394714355,
"mean": 0.0331900492310524,
"std": 1.9867922067642212,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_v.weight": {
"min": -0.20660938322544098,
"max": 0.22584253549575806,
"mean": -7.262543658725917e-05,
"std": 0.04055970162153244,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_v.bias": {
"min": -0.06933361291885376,
"max": 0.06314393132925034,
"mean": 0.00014905043644830585,
"std": 0.014740395359694958,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.2.to_out.0.weight": {
"min": -0.46516552567481995,
"max": 0.3203747570514679,
"mean": 1.989086922549177e-05,
"std": 0.04059458523988724,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.20.2.to_out.0.bias": {
"min": -0.06398216634988785,
"max": 0.11521662026643753,
"mean": 0.0011892176698893309,
"std": 0.02469474822282791,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.3.g": {
"min": 0.37489306926727295,
"max": 0.9301723837852478,
"mean": 0.7509260177612305,
"std": 0.04003360494971275,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.20.4.ff.0.0.weight": {
"min": -0.27877017855644226,
"max": 0.27262061834335327,
"mean": -0.00016865786164999008,
"std": 0.0410030372440815,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.20.4.ff.0.0.bias": {
"min": -0.19846785068511963,
"max": 0.05112157389521599,
"mean": -0.032006848603487015,
"std": 0.02506233938038349,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.20.4.ff.2.weight": {
"min": -0.6571894884109497,
"max": 0.5354637503623962,
"mean": -4.8520763812121004e-05,
"std": 0.05285634472966194,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.20.4.ff.2.bias": {
"min": -0.19253292679786682,
"max": 0.5813104510307312,
"mean": -0.0005173450335860252,
"std": 0.04104470834136009,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.0.weight": {
"min": -0.41767504811286926,
"max": 0.3719256818294525,
"mean": 6.585116807400482e-06,
"std": 0.02162640169262886,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.21.1.g": {
"min": 0.21444188058376312,
"max": 0.7454288601875305,
"mean": 0.6494399309158325,
"std": 0.054196760058403015,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_q.weight": {
"min": -0.20942556858062744,
"max": 0.19570672512054443,
"mean": 4.021516360808164e-05,
"std": 0.03946828842163086,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_q.bias": {
"min": -0.32898303866386414,
"max": 0.2592002749443054,
"mean": -0.0032279789447784424,
"std": 0.05622360482811928,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_k.weight": {
"min": -0.2054453343153,
"max": 0.2543545663356781,
"mean": 5.45132061233744e-05,
"std": 0.03857067599892616,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_k.bias": {
"min": -6.233641624450684,
"max": 6.921432971954346,
"mean": 0.04828529804944992,
"std": 1.3836402893066406,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_v.weight": {
"min": -0.20949925482273102,
"max": 0.2304454892873764,
"mean": -4.72849160360056e-06,
"std": 0.041318491101264954,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_v.bias": {
"min": -0.04375026375055313,
"max": 0.03585176169872284,
"mean": -5.88857801631093e-07,
"std": 0.012790623120963573,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.2.to_out.0.weight": {
"min": -0.39803647994995117,
"max": 0.34512725472450256,
"mean": -5.491710908245295e-05,
"std": 0.042394764721393585,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.21.2.to_out.0.bias": {
"min": -0.054978147149086,
"max": 0.06269973516464233,
"mean": 0.0003556903393473476,
"std": 0.018663441762328148,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.3.g": {
"min": 0.35058680176734924,
"max": 1.043295979499817,
"mean": 0.789494514465332,
"std": 0.04858649522066116,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.21.4.ff.0.0.weight": {
"min": -0.33317434787750244,
"max": 0.3864516317844391,
"mean": -0.00016881646297406405,
"std": 0.041488684713840485,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.21.4.ff.0.0.bias": {
"min": -0.15732650458812714,
"max": 0.058728814125061035,
"mean": -0.03181058540940285,
"std": 0.025098087266087532,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.21.4.ff.2.weight": {
"min": -0.6958801746368408,
"max": 0.46852678060531616,
"mean": -8.982194412965328e-05,
"std": 0.05180330574512482,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.21.4.ff.2.bias": {
"min": -0.24772712588310242,
"max": 0.32808512449264526,
"mean": -0.0002515119267627597,
"std": 0.04140802100300789,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.0.weight": {
"min": -0.28731903433799744,
"max": 0.3503708243370056,
"mean": -2.625113665999379e-06,
"std": 0.024243580177426338,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.22.1.g": {
"min": 0.19668713212013245,
"max": 0.7778334617614746,
"mean": 0.670162558555603,
"std": 0.05853449925780296,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_q.weight": {
"min": -0.2283114343881607,
"max": 0.23055444657802582,
"mean": -2.0571733330143616e-05,
"std": 0.04044181853532791,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_q.bias": {
"min": -0.2195570170879364,
"max": 0.24048519134521484,
"mean": 0.000782210670877248,
"std": 0.055770643055438995,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_k.weight": {
"min": -0.21605147421360016,
"max": 0.22674262523651123,
"mean": -7.179281237768009e-05,
"std": 0.03937681019306183,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_k.bias": {
"min": -8.892273902893066,
"max": 9.054671287536621,
"mean": -0.0012077325955033302,
"std": 1.846124529838562,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_v.weight": {
"min": -0.2689066231250763,
"max": 0.2583616375923157,
"mean": 4.3370266212150455e-05,
"std": 0.03841203823685646,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_v.bias": {
"min": -0.05771247297525406,
"max": 0.05783558264374733,
"mean": 0.00035597707028500736,
"std": 0.014716549776494503,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.2.to_out.0.weight": {
"min": -0.2647928297519684,
"max": 0.28871840238571167,
"mean": -6.220719660632312e-05,
"std": 0.0390787236392498,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.22.2.to_out.0.bias": {
"min": -0.04365166649222374,
"max": 0.037368953227996826,
"mean": -8.94215190783143e-05,
"std": 0.013351045548915863,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.3.g": {
"min": 0.33930352330207825,
"max": 1.090523362159729,
"mean": 0.8638416528701782,
"std": 0.06374476104974747,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.22.4.ff.0.0.weight": {
"min": -0.4229956567287445,
"max": 0.41935035586357117,
"mean": 0.00031358242267742753,
"std": 0.04351169988512993,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.22.4.ff.0.0.bias": {
"min": -0.2143605649471283,
"max": 0.17033977806568146,
"mean": -0.029430482536554337,
"std": 0.031879011541604996,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.22.4.ff.2.weight": {
"min": -0.5980925559997559,
"max": 0.5593904852867126,
"mean": -0.0001523983955848962,
"std": 0.05345866456627846,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.22.4.ff.2.bias": {
"min": -0.17843037843704224,
"max": 0.3764672875404358,
"mean": 0.0013608136214315891,
"std": 0.037283699959516525,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.0.weight": {
"min": -0.3941720128059387,
"max": 0.3687548339366913,
"mean": 3.7372221413534135e-05,
"std": 0.02862183377146721,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.23.1.g": {
"min": 0.2906048893928528,
"max": 0.825853168964386,
"mean": 0.7055732607841492,
"std": 0.0677838996052742,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_q.weight": {
"min": -0.9263502359390259,
"max": 1.027148962020874,
"mean": -2.6785823138197884e-05,
"std": 0.04763893038034439,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_q.bias": {
"min": -0.8774253129959106,
"max": 0.8142860531806946,
"mean": -0.0003061135357711464,
"std": 0.09545911848545074,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_k.weight": {
"min": -0.2697736918926239,
"max": 0.24071107804775238,
"mean": -2.2601629098062404e-05,
"std": 0.038958579301834106,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_k.bias": {
"min": -23.70609474182129,
"max": 22.81615447998047,
"mean": -0.09178254753351212,
"std": 4.064568042755127,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_v.weight": {
"min": -0.22739385068416595,
"max": 0.24493008852005005,
"mean": -2.535741987230722e-05,
"std": 0.03864453360438347,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_v.bias": {
"min": -0.06026393920183182,
"max": 0.045535702258348465,
"mean": -0.00013921607751399279,
"std": 0.014681815169751644,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.2.to_out.0.weight": {
"min": -0.3383011817932129,
"max": 0.3741171360015869,
"mean": 6.997803211561404e-06,
"std": 0.040823448449373245,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.23.2.to_out.0.bias": {
"min": -0.046280112117528915,
"max": 0.19523115456104279,
"mean": 0.00027006896561942995,
"std": 0.01355893723666668,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.3.g": {
"min": 0.3735462725162506,
"max": 1.1277151107788086,
"mean": 0.8900589942932129,
"std": 0.06382670253515244,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.23.4.ff.0.0.weight": {
"min": -0.4478131830692291,
"max": 0.5424441695213318,
"mean": 2.4745060727582313e-05,
"std": 0.04557563737034798,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.23.4.ff.0.0.bias": {
"min": -0.22360379993915558,
"max": 0.08794356882572174,
"mean": -0.03199389576911926,
"std": 0.03773387894034386,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.23.4.ff.2.weight": {
"min": -0.7249262928962708,
"max": 0.6877928376197815,
"mean": 3.6950204957975075e-05,
"std": 0.051789939403533936,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.23.4.ff.2.bias": {
"min": -0.17425872385501862,
"max": 0.21810372173786163,
"mean": 3.0209601391106844e-05,
"std": 0.03174462914466858,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.0.weight": {
"min": -0.3392157554626465,
"max": 0.3738991320133209,
"mean": 4.299447755329311e-05,
"std": 0.03414613753557205,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.24.1.g": {
"min": 0.3178655207157135,
"max": 1.2844390869140625,
"mean": 0.6014401912689209,
"std": 0.08323848247528076,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_q.weight": {
"min": -0.2828904390335083,
"max": 0.260010302066803,
"mean": -3.007857230841182e-06,
"std": 0.03598371520638466,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_q.bias": {
"min": -0.2351931631565094,
"max": 0.20519772171974182,
"mean": 0.00022795653785578907,
"std": 0.055979955941438675,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_k.weight": {
"min": -0.43529582023620605,
"max": 0.32459068298339844,
"mean": 2.450653482810594e-05,
"std": 0.03413282707333565,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_k.bias": {
"min": -5.542441368103027,
"max": 7.307634353637695,
"mean": -0.007349876686930656,
"std": 0.6985355019569397,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_v.weight": {
"min": -0.3433660864830017,
"max": 0.3625560700893402,
"mean": 0.00010314527025911957,
"std": 0.04783623665571213,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_v.bias": {
"min": -0.07354722917079926,
"max": 0.060343291610479355,
"mean": 0.0009371445048600435,
"std": 0.014936422929167747,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.2.to_out.0.weight": {
"min": -0.25582820177078247,
"max": 0.286111980676651,
"mean": 4.655210432247259e-06,
"std": 0.04156283661723137,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.24.2.to_out.0.bias": {
"min": -0.05514800176024437,
"max": 0.06263813376426697,
"mean": 0.0001386886287946254,
"std": 0.007160879671573639,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.3.g": {
"min": 0.4938517212867737,
"max": 1.2188584804534912,
"mean": 1.0133963823318481,
"std": 0.11724550276994705,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.24.4.ff.0.0.weight": {
"min": -1.093719720840454,
"max": 1.0471616983413696,
"mean": -4.925714529235847e-05,
"std": 0.05241731181740761,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.24.4.ff.0.0.bias": {
"min": -0.2243891805410385,
"max": 0.172992542386055,
"mean": -0.027224872261285782,
"std": 0.03628592565655708,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.24.4.ff.2.weight": {
"min": -0.8836102485656738,
"max": 0.9222370386123657,
"mean": -0.0001438588951714337,
"std": 0.053294114768505096,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.24.4.ff.2.bias": {
"min": -0.17069175839424133,
"max": 0.37931114435195923,
"mean": 0.003359442111104727,
"std": 0.03984633460640907,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.0.weight": {
"min": -0.777143120765686,
"max": 0.7232267260551453,
"mean": 1.830433029681444e-05,
"std": 0.0461735762655735,
"sparsity": 0.0,
"shape": [
1024,
2048
]
},
"transformer.layers.25.1.g": {
"min": 0.3386678695678711,
"max": 1.4252641201019287,
"mean": 0.9481973648071289,
"std": 0.20639142394065857,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_q.weight": {
"min": -1.746235728263855,
"max": 1.7046191692352295,
"mean": 0.00022743589943274856,
"std": 0.1587381213903427,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_q.bias": {
"min": -1.1972129344940186,
"max": 1.0979515314102173,
"mean": -0.00952577032148838,
"std": 0.2035541981458664,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_k.weight": {
"min": -0.4209991693496704,
"max": 0.42664653062820435,
"mean": 6.461775046773255e-05,
"std": 0.04803095757961273,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_k.bias": {
"min": -19.71938133239746,
"max": 19.514814376831055,
"mean": -0.24804288148880005,
"std": 4.770266532897949,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_v.weight": {
"min": -0.32366812229156494,
"max": 0.43827319145202637,
"mean": -1.2008969861199148e-05,
"std": 0.04616396129131317,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_v.bias": {
"min": -0.03389401733875275,
"max": 0.03695628046989441,
"mean": 0.0006402541184797883,
"std": 0.012914549559354782,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.2.to_out.0.weight": {
"min": -0.7030304074287415,
"max": 0.6659538745880127,
"mean": 4.320529478718527e-05,
"std": 0.05788206309080124,
"sparsity": 0.0,
"shape": [
1024,
1024
]
},
"transformer.layers.25.2.to_out.0.bias": {
"min": -0.07218055427074432,
"max": 0.0675114244222641,
"mean": -0.0001346912613371387,
"std": 0.012894386425614357,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.3.g": {
"min": 0.3805386424064636,
"max": 1.3893085718154907,
"mean": 1.0665242671966553,
"std": 0.21952925622463226,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.layers.25.4.ff.0.0.weight": {
"min": -0.6161316633224487,
"max": 0.717426061630249,
"mean": 0.00011223374167457223,
"std": 0.0580313578248024,
"sparsity": 0.0,
"shape": [
4096,
1024
]
},
"transformer.layers.25.4.ff.0.0.bias": {
"min": -0.21904653310775757,
"max": 0.22452397644519806,
"mean": 0.006222008261829615,
"std": 0.049658045172691345,
"sparsity": 0.0,
"shape": [
4096
]
},
"transformer.layers.25.4.ff.2.weight": {
"min": -0.6296318769454956,
"max": 0.8893842101097107,
"mean": 1.2104990673833527e-05,
"std": 0.02354114130139351,
"sparsity": 0.0,
"shape": [
1024,
4096
]
},
"transformer.layers.25.4.ff.2.bias": {
"min": -0.5061390995979309,
"max": 0.473175585269928,
"mean": -0.003011696506291628,
"std": 0.06919368356466293,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.norm_out.g": {
"min": 0.5380294322967529,
"max": 1.1777888536453247,
"mean": 0.7825304865837097,
"std": 0.09833591431379318,
"sparsity": 0.0,
"shape": [
1024
]
},
"transformer.proj_out.weight": {
"min": -0.26662442088127136,
"max": 0.21249151229858398,
"mean": -0.00022446915681939572,
"std": 0.054007817059755325,
"sparsity": 0.0,
"shape": [
100,
1024
]
},
"transformer.proj_out.bias": {
"min": -0.23786024749279022,
"max": 0.014854340814054012,
"mean": -0.04389730468392372,
"std": 0.03425038233399391,
"sparsity": 0.0,
"shape": [
100
]
}
}
}